clear all
set more off
cap log close
    
global dir "/Replication Archive"
cd "${dir}"

set seed 151515


		************************************************************************
		************************************************************************
		*** STEP 1:  Insheet contributions data and keep info for matching   ***
		************************************************************************
		************************************************************************
		
	
********************************* 1980 to 2014 	*********************************	
		
forvalues d=1980(2)2014 {

cd "${dir}/Data/DIME_V3"
insheet using "contribDB_`d'.csv", clear

* keep if individual (versus organization or committee) 
keep if contributortype=="I"

* keep variables of interest to facilitate the manual matching
keep transactionid cycle bonicacid contributorlname contributorfname contributormname contributorsuffix contributorgender contributoraddress contributorcity contributorstate contributorzipcode contributoroccupation contributoremployer

* Clean names
	foreach var in contributorlname contributorfname contributormname contributorsuffix{
		replace `var' = lower(`var')

	* Remove special characters 
		forvalues y = 123/191 {
			replace `var' = subinstr(`var', uchar(`y'), "",.)
		}
		foreach y in 215 216 222 223 240 247 248 254{
			replace `var' = subinstr(`var', uchar(`y'), "",.)
		}

		replace `var' = subinstr(`var', uchar(192),"a",.)
		replace `var' = subinstr(`var', uchar(193),"a",.)
		replace `var' = subinstr(`var', uchar(194),"a",.)
		replace `var' = subinstr(`var', uchar(195),"a",.)
		replace `var' = subinstr(`var', uchar(196),"a",.)
		replace `var' = subinstr(`var', uchar(197),"a",.)
		replace `var' = subinstr(`var', uchar(198),"a",.)
		replace `var' = subinstr(`var', uchar(199),"c",.)
		replace `var' = subinstr(`var', uchar(200),"e",.)
		replace `var' = subinstr(`var', uchar(201),"e",.)
		replace `var' = subinstr(`var', uchar(202),"e",.)
		replace `var' = subinstr(`var', uchar(203),"e",.)
		replace `var' = subinstr(`var', uchar(204),"i",.)
		replace `var' = subinstr(`var', uchar(205),"i",.)
		replace `var' = subinstr(`var', uchar(206),"i",.)
		replace `var' = subinstr(`var', uchar(207),"i",.)
		replace `var' = subinstr(`var', uchar(208),"d",.)
		replace `var' = subinstr(`var', uchar(209),"n",.)
		replace `var' = subinstr(`var', uchar(210),"o",.)
		replace `var' = subinstr(`var', uchar(211),"o",.)
		replace `var' = subinstr(`var', uchar(212),"o",.)
		replace `var' = subinstr(`var', uchar(213),"o",.)
		replace `var' = subinstr(`var', uchar(214),"o",.)
		replace `var' = subinstr(`var', uchar(217),"u",.)
		replace `var' = subinstr(`var', uchar(218),"u",.)
		replace `var' = subinstr(`var', uchar(219),"u",.)
		replace `var' = subinstr(`var', uchar(220),"u",.)
		replace `var' = subinstr(`var', uchar(221),"y",.)
		replace `var' = subinstr(`var', uchar(224),"a",.)
		replace `var' = subinstr(`var', uchar(225),"a",.)
		replace `var' = subinstr(`var', uchar(226),"a",.)
		replace `var' = subinstr(`var', uchar(227),"a",.)
		replace `var' = subinstr(`var', uchar(228),"a",.)
		replace `var' = subinstr(`var', uchar(229),"a",.)
		replace `var' = subinstr(`var', uchar(230),"a",.)
		replace `var' = subinstr(`var', uchar(231),"c",.)
		replace `var' = subinstr(`var', uchar(232),"e",.)
		replace `var' = subinstr(`var', uchar(233),"e",.)
		replace `var' = subinstr(`var', uchar(234),"e",.)
		replace `var' = subinstr(`var', uchar(235),"e",.)
		replace `var' = subinstr(`var', uchar(236),"i",.)
		replace `var' = subinstr(`var', uchar(237),"i",.)
		replace `var' = subinstr(`var', uchar(238),"i",.)
		replace `var' = subinstr(`var', uchar(239),"i",.)
		replace `var' = subinstr(`var', uchar(241),"n",.)
		replace `var' = subinstr(`var', uchar(242),"o",.)
		replace `var' = subinstr(`var', uchar(243),"o",.)
		replace `var' = subinstr(`var', uchar(244),"o",.)
		replace `var' = subinstr(`var', uchar(245),"o",.)
		replace `var' = subinstr(`var', uchar(246),"o",.)
		replace `var' = subinstr(`var', uchar(249),"u",.)
		replace `var' = subinstr(`var', uchar(250),"u",.)
		replace `var' = subinstr(`var', uchar(251),"u",.)
		replace `var' = subinstr(`var', uchar(252),"u",.)
		replace `var' = subinstr(`var', uchar(253),"y",.)
		replace `var' = subinstr(`var', uchar(255),"y",.)

		replace `var' = rtrim(`var')
		replace `var' = ltrim(`var')
		replace `var' = itrim(`var')
		}

replace contributorsuffix=subinstr(contributorsuffix,"ph d", "phd",.)
replace contributorsuffix=subinstr(contributorsuffix,"professor", "prof",.)
replace contributorsuffix=subinstr(contributorsuffix,"doctor", "dr",.)

	* generate the full name of the contributor (first+midname+last+suffix)
	gen contributor_name_1 = contributorfname + " " + contributormname + " " + contributorlname + " " + contributorsuffix

	* generate the full name of the contributor (midname+first+last+suffix)
	gen contributor_name_2 = contributormname + " " + contributorfname + " " + contributorlname + " " + contributorsuffix

	* now generate the full name of the contributor withouth suffix (first+midname+last)
	gen contributor_name_3 = contributorfname + " " + contributormname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix (midname+first+last)
	gen contributor_name_4 = contributormname + " " + contributorfname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix and midname (first+last)
	gen contributor_name_5 = contributorfname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix and midname (midname+last)
	gen contributor_name_6 = contributormname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix (first+last+suffix)
	gen contributor_name_7 = contributorfname + " " + contributorlname + " " + contributorsuffix

* get rid of trailing blanks
foreach y in contributor_name_1 contributor_name_2 contributor_name_3 contributor_name_4 contributor_name_5 contributor_name_6 contributor_name_7 {
replace `y'=rtrim(`y')
replace `y'=ltrim(`y')
replace `y'=itrim(`y')
}

* Clean employer names
foreach x in contributoroccupation contributoremployer {
gen `x'_t=`x'
replace `x'=lower(`x')
replace `x'=subinword(`x', "corp", "",.)
replace `x'=subinword(`x', "inc", "",.)
replace `x'=subinword(`x', "holdings", "",.)
replace `x'=subinword(`x', "holding", "",.)
replace `x'=subinword(`x', "co", "",.)
replace `x'=subinword(`x', "company", "",.)
replace `x'=subinword(`x', "corporation", "",.)
replace `x'=subinword(`x', "industries", "",.)
replace `x'=subinword(`x', "group", "",.)
replace `x'=subinword(`x', "companies", "",.)
replace `x'=subinword(`x', "enterprises", "",.)
replace `x'=subinword(`x', "cos", "",.)
replace `x'=subinword(`x', "ltd", "",.)
replace `x'=subinword(`x', "srl", "",.)
replace `x'=subinword(`x', "spa", "",.)
replace `x'=subinword(`x', "plc", "",.)
replace `x'=subinword(`x', "llc", "",.)
replace `x'=subinword(`x', "sa", "",.)
replace `x'=subinword(`x', "lp", "",.)
replace `x'=subinword(`x', "llp", "",.)
replace `x'=rtrim(`x')
replace `x'=ltrim(`x')
replace `x'=itrim(`x')
replace `x'=`x'_t if `x'=="" & `x'_t!=""
drop `x'_t
}

* run this to erase characters in employer's name that give problems to reclink
forvalues y=33/48 {
for var contributoroccupation contributoremployer: replace X=subinstr(X,char(`y'),"",.)
}
foreach y in 58 59 60 61 62 63 64 91 92 93 94 95 96 {
for var contributoroccupation contributoremployer: replace X=subinstr(X,char(`y'),"",.)
}

foreach x in contributoroccupation contributoremployer {
replace `x'=subinstr(`x',"(","",.)
replace `x'=subinstr(`x',"(","",.)
replace `x'=rtrim(`x')
replace `x'=ltrim(`x')
replace `x'=itrim(`x')
}

save "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", replace
}

********************************* 2016 *********************************
* Here because of the size the dataset is split in smaller pieces xaa,xab, etc

foreach i in a b c d e f g h i j k{
	cd "${dir}/Data/DIME_V3/2016"
	insheet using "xa`i'.csv", clear
	cap rename (v1-v46) (cycle transactionid transactiontype amount date bonicacid contributorname contributorlname contributorfname contributormname contributorsuffix contributortitle contributorffname contributortype contributorgender contributoraddress contributorcity contributorstate contributorzipcode contributoroccupation contributoremployer iscorp recipientname bonicarid recipientparty recipienttype recipientstate seat electiontype latitude longitude gisconfidence contributordistrict90s contributordistrict00s contributordistrict10s censustract efecmemo efecmemo2 efectransactionidorig bkreftransactionid efecorgorig efeccomidorig efecformtype excludedfromscaling contributorcfscore candidatecfscore)
	keep if contributortype == "I" // only keep individual contributions
	* keep variables of interest to facilitate the manual matching
	keep transactionid cycle bonicacid contributorlname contributorfname contributormname contributorsuffix contributorgender contributoraddress contributorcity contributorstate contributorzipcode contributoroccupation contributoremployer

	* Clean names
	foreach var in contributorlname contributorfname contributormname contributorsuffix{
		replace `var' = lower(`var')

	* Remove special characters 
		forvalues y = 123/191 {
			replace `var' = subinstr(`var', uchar(`y'), "",.)
		}
		foreach y in 215 216 222 223 240 247 248 254{
			replace `var' = subinstr(`var', uchar(`y'), "",.)
		}

		replace `var' = subinstr(`var', uchar(192),"a",.)
		replace `var' = subinstr(`var', uchar(193),"a",.)
		replace `var' = subinstr(`var', uchar(194),"a",.)
		replace `var' = subinstr(`var', uchar(195),"a",.)
		replace `var' = subinstr(`var', uchar(196),"a",.)
		replace `var' = subinstr(`var', uchar(197),"a",.)
		replace `var' = subinstr(`var', uchar(198),"a",.)
		replace `var' = subinstr(`var', uchar(199),"c",.)
		replace `var' = subinstr(`var', uchar(200),"e",.)
		replace `var' = subinstr(`var', uchar(201),"e",.)
		replace `var' = subinstr(`var', uchar(202),"e",.)
		replace `var' = subinstr(`var', uchar(203),"e",.)
		replace `var' = subinstr(`var', uchar(204),"i",.)
		replace `var' = subinstr(`var', uchar(205),"i",.)
		replace `var' = subinstr(`var', uchar(206),"i",.)
		replace `var' = subinstr(`var', uchar(207),"i",.)
		replace `var' = subinstr(`var', uchar(208),"d",.)
		replace `var' = subinstr(`var', uchar(209),"n",.)
		replace `var' = subinstr(`var', uchar(210),"o",.)
		replace `var' = subinstr(`var', uchar(211),"o",.)
		replace `var' = subinstr(`var', uchar(212),"o",.)
		replace `var' = subinstr(`var', uchar(213),"o",.)
		replace `var' = subinstr(`var', uchar(214),"o",.)
		replace `var' = subinstr(`var', uchar(217),"u",.)
		replace `var' = subinstr(`var', uchar(218),"u",.)
		replace `var' = subinstr(`var', uchar(219),"u",.)
		replace `var' = subinstr(`var', uchar(220),"u",.)
		replace `var' = subinstr(`var', uchar(221),"y",.)
		replace `var' = subinstr(`var', uchar(224),"a",.)
		replace `var' = subinstr(`var', uchar(225),"a",.)
		replace `var' = subinstr(`var', uchar(226),"a",.)
		replace `var' = subinstr(`var', uchar(227),"a",.)
		replace `var' = subinstr(`var', uchar(228),"a",.)
		replace `var' = subinstr(`var', uchar(229),"a",.)
		replace `var' = subinstr(`var', uchar(230),"a",.)
		replace `var' = subinstr(`var', uchar(231),"c",.)
		replace `var' = subinstr(`var', uchar(232),"e",.)
		replace `var' = subinstr(`var', uchar(233),"e",.)
		replace `var' = subinstr(`var', uchar(234),"e",.)
		replace `var' = subinstr(`var', uchar(235),"e",.)
		replace `var' = subinstr(`var', uchar(236),"i",.)
		replace `var' = subinstr(`var', uchar(237),"i",.)
		replace `var' = subinstr(`var', uchar(238),"i",.)
		replace `var' = subinstr(`var', uchar(239),"i",.)
		replace `var' = subinstr(`var', uchar(241),"n",.)
		replace `var' = subinstr(`var', uchar(242),"o",.)
		replace `var' = subinstr(`var', uchar(243),"o",.)
		replace `var' = subinstr(`var', uchar(244),"o",.)
		replace `var' = subinstr(`var', uchar(245),"o",.)
		replace `var' = subinstr(`var', uchar(246),"o",.)
		replace `var' = subinstr(`var', uchar(249),"u",.)
		replace `var' = subinstr(`var', uchar(250),"u",.)
		replace `var' = subinstr(`var', uchar(251),"u",.)
		replace `var' = subinstr(`var', uchar(252),"u",.)
		replace `var' = subinstr(`var', uchar(253),"y",.)
		replace `var' = subinstr(`var', uchar(255),"y",.)

		replace `var' = rtrim(`var')
		replace `var' = ltrim(`var')
		replace `var' = itrim(`var')
		}

	replace contributorsuffix = subinstr(contributorsuffix, "ph d", "phd",.)
	replace contributorsuffix = subinstr(contributorsuffix, "professor", "prof",.)
	replace contributorsuffix = subinstr(contributorsuffix, "doctor", "dr",.)

	* generate the full name of the contributor (first+midname+last+suffix)
	gen contributor_name_1 = contributorfname + " " + contributormname + " " + contributorlname + " " + contributorsuffix

	* generate the full name of the contributor (midname+first+last+suffix)
	gen contributor_name_2 = contributormname + " " + contributorfname + " " + contributorlname + " " + contributorsuffix

	* now generate the full name of the contributor withouth suffix (first+midname+last)
	gen contributor_name_3 = contributorfname + " " + contributormname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix (midname+first+last)
	gen contributor_name_4 = contributormname + " " + contributorfname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix and midname (first+last)
	gen contributor_name_5 = contributorfname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix and midname (midname+last)
	gen contributor_name_6 = contributormname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix (first+last+suffix)
	gen contributor_name_7 = contributorfname + " " + contributorlname + " " + contributorsuffix

	* get rid of trailing blanks
	foreach y in contributor_name_1 contributor_name_2 contributor_name_3 contributor_name_4 contributor_name_5 contributor_name_6 contributor_name_7 {
	replace `y' = rtrim(`y')
	replace `y' = ltrim(`y')
	replace `y' = itrim(`y')
	}

* Clean employer names
foreach x in contributoroccupation contributoremployer {
gen `x'_t=`x'
replace `x'=lower(`x')
replace `x'=subinword(`x', "corp", "",.)
replace `x'=subinword(`x', "inc", "",.)
replace `x'=subinword(`x', "holdings", "",.)
replace `x'=subinword(`x', "holding", "",.)
replace `x'=subinword(`x', "co", "",.)
replace `x'=subinword(`x', "company", "",.)
replace `x'=subinword(`x', "corporation", "",.)
replace `x'=subinword(`x', "industries", "",.)
replace `x'=subinword(`x', "group", "",.)
replace `x'=subinword(`x', "companies", "",.)
replace `x'=subinword(`x', "enterprises", "",.)
replace `x'=subinword(`x', "cos", "",.)
replace `x'=subinword(`x', "ltd", "",.)
replace `x'=subinword(`x', "srl", "",.)
replace `x'=subinword(`x', "spa", "",.)
replace `x'=subinword(`x', "plc", "",.)
replace `x'=subinword(`x', "llc", "",.)
replace `x'=subinword(`x', "sa", "",.)
replace `x'=subinword(`x', "lp", "",.)
replace `x'=subinword(`x', "llp", "",.)
replace `x'=rtrim(`x')
replace `x'=ltrim(`x')
replace `x'=itrim(`x')
replace `x'=`x'_t if `x'=="" & `x'_t!=""
drop `x'_t
}

	* run this to erase characters in employer's name that give problems to reclink
	forvalues y = 33/48 {
		for var contributoroccupation contributoremployer: replace X=subinstr(X,char(`y'),"",.)
	}
	foreach y in 58 59 60 61 62 63 64 91 92 93 94 95 96 {
		for var contributoroccupation contributoremployer: replace X=subinstr(X,char(`y'),"",.)
	}
	foreach x in contributoroccupation contributoremployer {
	replace `x'=subinstr(`x',"(","",.)
	replace `x'=subinstr(`x',"(","",.)
	replace `x'=rtrim(`x')
	replace `x'=ltrim(`x')
	replace `x'=itrim(`x')
	}
	
	compress
	save "xa`i'.dta", replace
}


foreach i in a b c d e f g h i j k{
	use "${dir}/Data/DIME_V3/2016/xa`i'.dta", clear

	cap destring cycle, force replace
	cap destring bonicacid, force replace
	
	save "${dir}/Data/DIME_V3/2016/xa`i'.dta", replace
}

use "${dir}/Data/DIME_V3/2016/xaa.dta", clear

foreach i in b c d e f g h i j k{
	append using "${dir}/Data/DIME_V3/2016/xa`i'.dta", force
	erase "${dir}/Data/DIME_V3/2016/xa`i'.dta"
}

save "${dir}/Data/Contributions/contributions_tomerge_2016.dta", replace

erase "${dir}/Data/DIME_V3/2016/xaa.dta"



********************************* 2018 *********************************
* Here because of the size the dataset is split in smaller pieces xaa,xab, etc

foreach i in a b c d e f g h i j k l m n{
	cd "${dir}/Data/DIME_V3/2018"
	insheet using "xa`i'.csv", clear
	cap rename (v1-v46) (cycle transactionid transactiontype amount date bonicacid contributorname contributorlname contributorfname contributormname contributorsuffix contributortitle contributorffname contributortype contributorgender contributoraddress contributorcity contributorstate contributorzipcode contributoroccupation contributoremployer iscorp recipientname bonicarid recipientparty recipienttype recipientstate seat electiontype latitude longitude gisconfidence contributordistrict90s contributordistrict00s contributordistrict10s censustract efecmemo efecmemo2 efectransactionidorig bkreftransactionid efecorgorig efeccomidorig efecformtype excludedfromscaling contributorcfscore candidatecfscore)
	keep if contributortype == "I" // only keep individual contributions
	* keep variables of interest to facilitate the manual matching
	keep transactionid cycle bonicacid contributorlname contributorfname contributormname contributorsuffix contributorgender contributoraddress contributorcity contributorstate contributorzipcode contributoroccupation contributoremployer

	* Clean names
	foreach var in contributorlname contributorfname contributormname contributorsuffix{
		replace `var' = lower(`var')

	* Remove special characters
		forvalues y = 123/191 {
			replace `var' = subinstr(`var', uchar(`y'), "",.)
		}
		foreach y in 215 216 222 223 240 247 248 254{
			replace `var' = subinstr(`var', uchar(`y'), "",.)
		}

		replace `var' = subinstr(`var', uchar(192),"a",.)
		replace `var' = subinstr(`var', uchar(193),"a",.)
		replace `var' = subinstr(`var', uchar(194),"a",.)
		replace `var' = subinstr(`var', uchar(195),"a",.)
		replace `var' = subinstr(`var', uchar(196),"a",.)
		replace `var' = subinstr(`var', uchar(197),"a",.)
		replace `var' = subinstr(`var', uchar(198),"a",.)
		replace `var' = subinstr(`var', uchar(199),"c",.)
		replace `var' = subinstr(`var', uchar(200),"e",.)
		replace `var' = subinstr(`var', uchar(201),"e",.)
		replace `var' = subinstr(`var', uchar(202),"e",.)
		replace `var' = subinstr(`var', uchar(203),"e",.)
		replace `var' = subinstr(`var', uchar(204),"i",.)
		replace `var' = subinstr(`var', uchar(205),"i",.)
		replace `var' = subinstr(`var', uchar(206),"i",.)
		replace `var' = subinstr(`var', uchar(207),"i",.)
		replace `var' = subinstr(`var', uchar(208),"d",.)
		replace `var' = subinstr(`var', uchar(209),"n",.)
		replace `var' = subinstr(`var', uchar(210),"o",.)
		replace `var' = subinstr(`var', uchar(211),"o",.)
		replace `var' = subinstr(`var', uchar(212),"o",.)
		replace `var' = subinstr(`var', uchar(213),"o",.)
		replace `var' = subinstr(`var', uchar(214),"o",.)
		replace `var' = subinstr(`var', uchar(217),"u",.)
		replace `var' = subinstr(`var', uchar(218),"u",.)
		replace `var' = subinstr(`var', uchar(219),"u",.)
		replace `var' = subinstr(`var', uchar(220),"u",.)
		replace `var' = subinstr(`var', uchar(221),"y",.)
		replace `var' = subinstr(`var', uchar(224),"a",.)
		replace `var' = subinstr(`var', uchar(225),"a",.)
		replace `var' = subinstr(`var', uchar(226),"a",.)
		replace `var' = subinstr(`var', uchar(227),"a",.)
		replace `var' = subinstr(`var', uchar(228),"a",.)
		replace `var' = subinstr(`var', uchar(229),"a",.)
		replace `var' = subinstr(`var', uchar(230),"a",.)
		replace `var' = subinstr(`var', uchar(231),"c",.)
		replace `var' = subinstr(`var', uchar(232),"e",.)
		replace `var' = subinstr(`var', uchar(233),"e",.)
		replace `var' = subinstr(`var', uchar(234),"e",.)
		replace `var' = subinstr(`var', uchar(235),"e",.)
		replace `var' = subinstr(`var', uchar(236),"i",.)
		replace `var' = subinstr(`var', uchar(237),"i",.)
		replace `var' = subinstr(`var', uchar(238),"i",.)
		replace `var' = subinstr(`var', uchar(239),"i",.)
		replace `var' = subinstr(`var', uchar(241),"n",.)
		replace `var' = subinstr(`var', uchar(242),"o",.)
		replace `var' = subinstr(`var', uchar(243),"o",.)
		replace `var' = subinstr(`var', uchar(244),"o",.)
		replace `var' = subinstr(`var', uchar(245),"o",.)
		replace `var' = subinstr(`var', uchar(246),"o",.)
		replace `var' = subinstr(`var', uchar(249),"u",.)
		replace `var' = subinstr(`var', uchar(250),"u",.)
		replace `var' = subinstr(`var', uchar(251),"u",.)
		replace `var' = subinstr(`var', uchar(252),"u",.)
		replace `var' = subinstr(`var', uchar(253),"y",.)
		replace `var' = subinstr(`var', uchar(255),"y",.)

		replace `var' = rtrim(`var')
		replace `var' = ltrim(`var')
		replace `var' = itrim(`var')
		}

	replace contributorsuffix = subinstr(contributorsuffix, "ph d", "phd",.)
	replace contributorsuffix = subinstr(contributorsuffix, "professor", "prof",.)
	replace contributorsuffix = subinstr(contributorsuffix, "doctor", "dr",.)

	* generate the full name of the contributor (first+midname+last+suffix)
	gen contributor_name_1 = contributorfname + " " + contributormname + " " + contributorlname + " " + contributorsuffix

	* generate the full name of the contributor (midname+first+last+suffix)
	gen contributor_name_2 = contributormname + " " + contributorfname + " " + contributorlname + " " + contributorsuffix

	* now generate the full name of the contributor withouth suffix (first+midname+last)
	gen contributor_name_3 = contributorfname + " " + contributormname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix (midname+first+last)
	gen contributor_name_4 = contributormname + " " + contributorfname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix and midname (first+last)
	gen contributor_name_5 = contributorfname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix and midname (midname+last)
	gen contributor_name_6 = contributormname + " " + contributorlname

	* now generate the full name of the contributor withouth suffix (first+last+suffix)
	gen contributor_name_7 = contributorfname + " " + contributorlname + " " + contributorsuffix

	* get rid of trailing blanks
	foreach y in contributor_name_1 contributor_name_2 contributor_name_3 contributor_name_4 contributor_name_5 contributor_name_6 contributor_name_7 {
	replace `y' = rtrim(`y')
	replace `y' = ltrim(`y')
	replace `y' = itrim(`y')
	}

* Clean employer names
foreach x in contributoroccupation contributoremployer {
gen `x'_t=`x'
replace `x'=lower(`x')
replace `x'=subinword(`x', "corp", "",.)
replace `x'=subinword(`x', "inc", "",.)
replace `x'=subinword(`x', "holdings", "",.)
replace `x'=subinword(`x', "holding", "",.)
replace `x'=subinword(`x', "co", "",.)
replace `x'=subinword(`x', "company", "",.)
replace `x'=subinword(`x', "corporation", "",.)
replace `x'=subinword(`x', "industries", "",.)
replace `x'=subinword(`x', "group", "",.)
replace `x'=subinword(`x', "companies", "",.)
replace `x'=subinword(`x', "enterprises", "",.)
replace `x'=subinword(`x', "cos", "",.)
replace `x'=subinword(`x', "ltd", "",.)
replace `x'=subinword(`x', "srl", "",.)
replace `x'=subinword(`x', "spa", "",.)
replace `x'=subinword(`x', "plc", "",.)
replace `x'=subinword(`x', "llc", "",.)
replace `x'=subinword(`x', "sa", "",.)
replace `x'=subinword(`x', "lp", "",.)
replace `x'=subinword(`x', "llp", "",.)
replace `x'=rtrim(`x')
replace `x'=ltrim(`x')
replace `x'=itrim(`x')
replace `x'=`x'_t if `x'=="" & `x'_t!=""
drop `x'_t
}

	* run this to erase characters in employer's name that give problems to reclink
	forvalues y = 33/48 {
		for var contributoroccupation contributoremployer: replace X=subinstr(X,char(`y'),"",.)
	}
	foreach y in 58 59 60 61 62 63 64 91 92 93 94 95 96 {
		for var contributoroccupation contributoremployer: replace X=subinstr(X,char(`y'),"",.)
	}
	foreach x in contributoroccupation contributoremployer {
	replace `x'=subinstr(`x',"(","",.)
	replace `x'=subinstr(`x',"(","",.)
	replace `x'=rtrim(`x')
	replace `x'=ltrim(`x')
	replace `x'=itrim(`x')
	}
	
	compress
	save "xa`i'.dta", replace
}


foreach i in a b c d e f g h i j k l m n{
	use "${dir}/Data/DIME_V3/2018/xa`i'.dta", clear

	cap destring cycle, force replace
	cap destring bonicacid, force replace
	
	save "${dir}/Data/DIME_V3/2018/xa`i'.dta", replace
}

use "${dir}/Data/DIME_V3/2018/xaa.dta", clear

foreach i in b c d e f g h i j k l m n{
	append using "${dir}/Data/DIME_V3/2018/xa`i'.dta", force
	erase "${dir}/Data/DIME_V3/2018/xa`i'.dta"
}

save "${dir}/Data/Contributions/contributions_tomerge_2018.dta", replace

erase "${dir}/Data/DIME_V3/2018/xaa.dta"




		************************************************************************
		************************************************************************
		*** STEP 2:  Insheet Data on Corporate Executives from Boardex       ***
		************************************************************************
		************************************************************************

set excelxlsxlargefile on

* Board composition in the "fully analyzed" companies
import excel "${dir}/Data/Boardex/Boardex original/NA - Board Summary/NA - Board Summary - 1.xlsx", cellrange(A2) first clear
save "${dir}/Data/Boardex/Board Summary/Board_Summary_1.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/NA - Board Summary/NA - Board Summary - 2.xlsx", cellrange(A2) first clear 
save "${dir}/Data/Boardex/Board Summary/Board_Summary_2.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/NA - Board Summary/NA - Board Summary - 3.xlsx", cellrange(A2) first clear
save "${dir}/Data/Boardex/Board Summary/Board_Summary_3.dta", replace

* Other senior management in the "fully analyzed" companies
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Org Summary/NA - SMDEs Org Summary - 1", cellrange(A2) first clear
save "${dir}/Data/Boardex/SMDEs Board Summary/SMDEs_Summary_1.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Org Summary/NA - SMDEs Org Summary - 2", cellrange(A2) first clear
save "${dir}/Data/Boardex/SMDEs Board Summary/SMDEs_Summary_2.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Org Summary/NA - SMDEs Org Summary - 3", cellrange(A2) first clear
save "${dir}/Data/Boardex/SMDEs Board Summary/SMDEs_Summary_3.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Org Summary/NA - SMDEs Org Summary - 4", cellrange(A2) first clear
save "${dir}/Data/Boardex/SMDEs Board Summary/SMDEs_Summary_4.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Org Summary/NA - SMDEs Org Summary - 5", cellrange(A2) first clear
save "${dir}/Data/Boardex/SMDEs Board Summary/SMDEs_Summary_5.dta", replace

* Employment history of all directors entering the "fully analyzed" companies database
import excel "${dir}/Data/Boardex/Boardex original/NA - Director Profile/NA - Director Profile - Employment Current Board.xlsx", first clear
save "${dir}/Data/Boardex/Director Profile/Director_Current_Board.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/NA - Director Profile/NA - Director Profile - Employment Historical Board - 1.xlsx", first clear
save "${dir}/Data/Boardex/Director Profile/Director_Historic_Board_1.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/NA - Director Profile/NA - Director Profile - Employment Historical Board - 2.xlsx", first clear
save "${dir}/Data/Boardex/Director Profile/Director_Historic_Board_2.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/NA - Director Profile/NA - Director Profile - Employment Current NonBoard.xlsx", first clear
save "${dir}/Data/Boardex/Director Profile/Director_Current_NONBoard.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/NA - Director Profile/NA - Director Profile - Employment Historical NonBoard - 1.xlsx", first clear
save "${dir}/Data/Boardex/Director Profile/Director_Historic_NONBoard_1.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/NA - Director Profile/NA - Director Profile - Employment Historical NonBoard - 2.xlsx", first clear
save "${dir}/Data/Boardex/Director Profile/Director_Historic_NONBoard_2.dta", replace

* Employment history of all senior management entering the "fully analyzed" companies database
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Profile/NA - SMDEs Profile - Employment Current Board.xlsx", first clear
save "${dir}/Data/Boardex/SMDEs Profile/SMDEs_Current_Board.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Profile/NA - SMDEs Profile - Employment Current NonBoard.xlsx", first clear
save "${dir}/Data/Boardex/SMDEs Profile/SMDEs_Current_NONBoard.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Profile/NA - SMDEs Profile - Employment Historical Board.xlsx", first clear
save "${dir}/Data/Boardex/SMDEs Profile/SMDEs_Historic_Board.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Profile/NA - SMDEs Profile - Employment Historical NonBoard - 1.xlsx", first clear
save "${dir}/Data/Boardex/SMDEs Profile/SMDEs_Historic_NONBoard1.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Profile/NA - SMDEs Profile - Employment Historical NonBoard - 2.xlsx", first clear
save "${dir}/Data/Boardex/SMDEs Profile/SMDEs_Historic_NONBoard2.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Profile/NA - SMDEs Profile - Employment Historical NonBoard - 3.xlsx", first clear
save "${dir}/Data/Boardex/SMDEs Profile/SMDEs_Historic_NONBoard3.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Profile/NA - SMDEs Profile - Employment Historical NonBoard - 4.xlsx", first clear
save "${dir}/Data/Boardex/SMDEs Profile/SMDEs_Historic_NONBoard4.dta", replace
import excel "${dir}/Data/Boardex/Boardex original/SMDEs Profile/NA - SMDEs Profile - Employment Historical NonBoard - 5.xlsx", first clear
save "${dir}/Data/Boardex/SMDEs Profile/SMDEs_Historic_NONBoard5.dta", replace


		*********************************************************************************************
		*********************************************************************************************
		*** STEP 3:  Create datasets of directors and SM to be merged to contributions data       ***
		*********************************************************************************************
		*********************************************************************************************

* Board members in the "fully analyzed" companies
forvalues i=1(1)3 {
use "${dir}/Data/Boardex/Board Summary/Board_Summary_`i'.dta", clear
rename Ticker CompanyTicker
rename ISIN CompanyISIN
rename Index CompanyIndex
keep AnnualReportYear Country Sector CompanyName CompanyID CompanyISIN CompanyTicker CompanyIndex DirectorTypeEDorSD IndividualName DirectorID NationalityMix TotalDirectorsontheBoard NumberofSDsonBoard
keep if Country=="United States"
drop Country
save "temp_`i'.dta", replace
}
use "temp_1.dta"
append using "temp_2.dta"
append using "temp_3.dta"
forvalues i=1(1)3 {
erase "temp_`i'.dta"
}

duplicates drop AnnualReportYear CompanyID DirectorID DirectorTypeEDorSD, force
bys AnnualReportYear CompanyID DirectorID: gen N=_N
/*
tab N
          N |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    914,954       99.87       99.87
          2 |      1,168        0.13      100.00
------------+-----------------------------------
      Total |    916,122      100.00

* A small number of directors compare both as executive and supervisory director
* Keep the supervisory role
*/
drop if DirectorTypeEDorSD=="ED" & N==2
drop N
bys AnnualReportYear CompanyID DirectorID: gen N=_N
/*
tab N

          N |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    915,538      100.00      100.00
------------+-----------------------------------
      Total |    915,538      100.00

* Good: a unique director for each firm-year-director
*/
drop N

* Indicator for members of the board
gen Board=1
label var Board "Member of the Board"

* Keep only the Senior Management of these firms not sitting in the board (there are also the individuals who have both executive roles and sit on the board, but these are already in Board_Summary.dta)
forvalues i=1(1)5 {
preserve
use "${dir}/Data/Boardex/SMDEs Board Summary/SMDEs_Summary_`i'.dta", clear
rename Ticker CompanyTicker
rename ISIN CompanyISIN
rename Index CompanyIndex
keep AnnualReportYear Country Sector CompanyName CompanyID CompanyISIN CompanyTicker CompanyIndex DirectorTypeEDSDorSM IndividualName DirectorID NationalityMix IndividualRole
keep if Country=="United States"
drop Country
keep if DirectorTypeEDSDorSM =="SM"
drop DirectorTypeEDSDorSM
duplicates drop AnnualReportYear CompanyID DirectorID, force
save "temp_`i'.dta", replace
restore
}

* Append everything together
append using "temp_1.dta"
append using "temp_2.dta"
append using "temp_3.dta"
append using "temp_4.dta"
append using "temp_5.dta"

replace Board=0 if Board==.
bys AnnualReportYear CompanyID: egen max=max(TotalDirectorsontheBoard)
replace TotalDirectorsontheBoard=max if TotalDirectorsontheBoard==.
drop max
bys AnnualReportYear CompanyID: egen max=max(NumberofSDsonBoard)
replace NumberofSDsonBoard=max if NumberofSDsonBoard==.
drop max
erase "temp_1.dta"
erase "temp_2.dta"
erase "temp_3.dta"
erase "temp_4.dta"
erase "temp_5.dta"

* Drop 204 director-year-firm triples that appear twice since both board member and in senior mgmt data: keep board member observation
bys AnnualReportYear CompanyID DirectorID: gen N=_N
drop if N==2 & Board==0
drop N

* Save dataset with the role of the individual in the firm
save "${dir}/Data/Boardex/Directors_to_Match.dta", replace



***********************
***** Clean names *****
***********************

replace IndividualName=lower(IndividualName)
foreach y in IndividualName {
replace `y'=rtrim(`y')
replace `y'=ltrim(`y')
replace `y'=itrim(`y')
}
forvalues y = 123/191 {
	replace IndividualName = subinstr(IndividualName, uchar(`y'), "",.)
}
foreach y in 215 216 222 223 240 247 248 254{
	replace IndividualName = subinstr(IndividualName, uchar(`y'), "",.)
}
replace IndividualName = subinstr(IndividualName, ",", "",.)
replace IndividualName = subinstr(IndividualName, ".", "",.)
replace IndividualName = subinstr(IndividualName, "'", "",.)
foreach var in IndividualName{
	replace `var' = subinstr(`var', uchar(192),"a",.)
	replace `var' = subinstr(`var', uchar(193),"a",.)
	replace `var' = subinstr(`var', uchar(194),"a",.)
	replace `var' = subinstr(`var', uchar(195),"a",.)
	replace `var' = subinstr(`var', uchar(196),"a",.)
	replace `var' = subinstr(`var', uchar(197),"a",.)
	replace `var' = subinstr(`var', uchar(198),"a",.)
	replace `var' = subinstr(`var', uchar(199),"c",.)
	replace `var' = subinstr(`var', uchar(200),"e",.)
	replace `var' = subinstr(`var', uchar(201),"e",.)
	replace `var' = subinstr(`var', uchar(202),"e",.)
	replace `var' = subinstr(`var', uchar(203),"e",.)
	replace `var' = subinstr(`var', uchar(204),"i",.)
	replace `var' = subinstr(`var', uchar(205),"i",.)
	replace `var' = subinstr(`var', uchar(206),"i",.)
	replace `var' = subinstr(`var', uchar(207),"i",.)
	replace `var' = subinstr(`var', uchar(208),"d",.)
	replace `var' = subinstr(`var', uchar(209),"n",.)
	replace `var' = subinstr(`var', uchar(210),"o",.)
	replace `var' = subinstr(`var', uchar(211),"o",.)
	replace `var' = subinstr(`var', uchar(212),"o",.)
	replace `var' = subinstr(`var', uchar(213),"o",.)
	replace `var' = subinstr(`var', uchar(214),"o",.)
	replace `var' = subinstr(`var', uchar(217),"u",.)
	replace `var' = subinstr(`var', uchar(218),"u",.)
	replace `var' = subinstr(`var', uchar(219),"u",.)
	replace `var' = subinstr(`var', uchar(220),"u",.)
	replace `var' = subinstr(`var', uchar(221),"y",.)
	replace `var' = subinstr(`var', uchar(224),"a",.)
	replace `var' = subinstr(`var', uchar(225),"a",.)
	replace `var' = subinstr(`var', uchar(226),"a",.)
	replace `var' = subinstr(`var', uchar(227),"a",.)
	replace `var' = subinstr(`var', uchar(228),"a",.)
	replace `var' = subinstr(`var', uchar(229),"a",.)
	replace `var' = subinstr(`var', uchar(230),"a",.)
	replace `var' = subinstr(`var', uchar(231),"c",.)
	replace `var' = subinstr(`var', uchar(232),"e",.)
	replace `var' = subinstr(`var', uchar(233),"e",.)
	replace `var' = subinstr(`var', uchar(234),"e",.)
	replace `var' = subinstr(`var', uchar(235),"e",.)
	replace `var' = subinstr(`var', uchar(236),"i",.)
	replace `var' = subinstr(`var', uchar(237),"i",.)
	replace `var' = subinstr(`var', uchar(238),"i",.)
	replace `var' = subinstr(`var', uchar(239),"i",.)
	replace `var' = subinstr(`var', uchar(241),"n",.)
	replace `var' = subinstr(`var', uchar(242),"o",.)
	replace `var' = subinstr(`var', uchar(243),"o",.)
	replace `var' = subinstr(`var', uchar(244),"o",.)
	replace `var' = subinstr(`var', uchar(245),"o",.)
	replace `var' = subinstr(`var', uchar(246),"o",.)
	replace `var' = subinstr(`var', uchar(249),"u",.)
	replace `var' = subinstr(`var', uchar(250),"u",.)
	replace `var' = subinstr(`var', uchar(251),"u",.)
	replace `var' = subinstr(`var', uchar(252),"u",.)
	replace `var' = subinstr(`var', uchar(253),"y",.)
	replace `var' = subinstr(`var', uchar(255),"y",.)
}
foreach y in IndividualName {
	replace `y' = rtrim(`y')
	replace `y' = ltrim(`y')
	replace `y' = itrim(`y')
}

* A few typos with the nicknames
replace IndividualName="baron gualthaus (guup) kraijenhoff" if IndividualName=="baron ((guup) gualthaus baron) kraijenhoff"
replace IndividualName="palmer (pete) edward retzlaff" if IndividualName=="baron ((pete) palmer edward) retzlaff"
replace IndividualName="doctor timothy (tim) taylor" if IndividualName=="doctor timothy ((tim)) taylor"
replace IndividualName="lord christopher (chris) wellesley" if IndividualName=="lord ((chris) christopher) wellesley"
replace IndividualName="lord henry dennistoun (dennis) stevenson of coddenham" if IndividualName=="lord ((dennis) henry dennistoun) stevenson of coddenham"
replace IndividualName="lord james (jim) blyth of rowington" if IndividualName=="lord ((jim) james) blyth of rowington"
replace IndividualName="lord terence james (jim) baron oneill of gatley" if IndividualName=="lord ((jim) terence james) baron oneill of gatley"

* get nickname
gen str nickname = regexs(0) if regexm(IndividualName,"[(][a-z]+[)]")
replace nickname = regexs(0) if regexm(IndividualName,"[(][a-z]+[ ][a-z]+[)]") & nickname == ""
replace nickname = regexs(0) if regexm(IndividualName,"[(][a-z]+[-][a-z]+[)]") & nickname == ""

replace nickname = itrim(nickname)
replace nickname = ltrim(nickname)
replace nickname = rtrim(nickname)

* get suffix
preserve
use "${dir}/Data/AUX/suffix.dta", clear
levelsof suffixtitle, local(levels)
restore
gen suffix=""

foreach l of local levels {
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"[ ](`l')[ ]") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"^(`l')[ ]") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"[ ](`l')$") 
}
foreach l in count ambassador brigadier captain reverend admiral dr sir commander colonel excellency senator baroness baron lord doctor prof professor major {
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"^(`l')[ ]") 
}
foreach l in sr jr ii iii iv v vi vii viii ix x {
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"[ ](`l')$") 
}
replace suffix=itrim(suffix)
replace suffix=ltrim(suffix)
replace suffix=rtrim(suffix)

replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(the hon)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(the hon doctor)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(the rt hon)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(the rt hon lord)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(professor doctor)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(rear admiral)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(brigadier general)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(major general)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(vice admiral)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(lt gen)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(lt colonel)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(air vice marshal)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(chief justice)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(general sir)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(h excellency)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(his highness)") 
replace suffix=suffix+" "+regexs(0) if regexm(IndividualName,"(lieutenant commander)") 

replace suffix=subinstr(suffix,"professor professor doctor","doctor professor",.)
replace suffix=subinstr(suffix,"the hon the hon doctor","the hon doctor",.)
replace suffix=subinstr(suffix,"major major general","major general",.)
replace suffix=subinstr(suffix,"brigadier brigadier general","brigadier general",.)
replace suffix=subinstr(suffix,"professor jr professor doctor","jr doctor professor",.)
replace suffix=subinstr(suffix,"professor iii professor doctor","iii doctor professor",.)
replace suffix=subinstr(suffix,"major jr major general","jr major general",.)
replace suffix=subinstr(suffix,"the rt hon the rt hon lord","the rt hon lord",.)
replace suffix=subinstr(suffix,"brigadier jr brigadier general","jr brigadier general",.)
replace suffix=subinstr(suffix,"jr the hon the hon doctor","jr the hon doctor",.)
replace suffix=subinstr(suffix,"major sr major general","sr major general",.)
replace suffix=subinstr(suffix,"major ii major general","ii major general",.)
replace suffix=subinstr(suffix,"major iii major general","iii major general",.)
replace suffix=subinstr(suffix,"iv the hon the hon doctor","iv the hon doctor",.)
replace suffix=subinstr(suffix,"iii the hon the hon doctor","iii the hon doctor",.)
replace suffix=subinstr(suffix,"brigadier sr brigadier general","sr brigadier general",.)
replace suffix=subinstr(suffix,"professor ii professor doctor","ii doctor professor",.)

replace suffix=itrim(suffix)
replace suffix=ltrim(suffix)
replace suffix=rtrim(suffix)

* strip name of nickname and suffix
gen temp_name=regexr(IndividualName,"[(][a-z]+[)]"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"[(][a-z]+[ ][a-z]+[)]"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"[(][a-z]+[-][a-z]+[)]"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
preserve
use "${dir}/Data/AUX/suffix.dta", clear
levelsof suffixtitle, local(levels)
restore
foreach l of local levels {
replace temp_name=regexr(temp_name,"[ ](`l')[ ]"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"^(`l')[ ]"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"[ ](`l')$"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
}
replace temp_name=regexr(temp_name,"(the rt hon lord)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(the hon doctor)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(the rt hon)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(the hon)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(professor doctor)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)

replace temp_name=regexr(temp_name,"(rear admiral)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(brigadier general)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(major general)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(vice admiral)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(lt gen)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(lt colonel)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(air vice marshal)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(chief justice)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(general sir)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(h excellency)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(his highness)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
replace temp_name=regexr(temp_name,"(lieutenant commander)"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)

foreach l in reverend admiral dr sir commander colonel excellency senator baroness baron lord doctor prof professor {
replace temp_name=regexr(temp_name,"^(`l')[ ]"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
}
foreach l in sr jr ii iii iv v vi vii viii ix {
replace temp_name=regexr(temp_name,"[ ](`l')$"," ")
replace temp_name=itrim(temp_name)
replace temp_name=ltrim(temp_name)
replace temp_name=rtrim(temp_name)
}

replace nickname=subinstr(nickname,"(","",.)
replace nickname=subinstr(nickname,")","",.)
replace nickname=rtrim(nickname)
replace nickname=ltrim(nickname)
replace nickname=itrim(nickname)

* Get midname(s), last name, and first name
split temp_name, p(" ")
* first one is first name
rename temp_name1 first_name
* last one is last name
gen last_name=""
replace last_name=temp_name10 if temp_name10!="" 
replace last_name=temp_name9 if temp_name10=="" & temp_name9!="" 
replace last_name=temp_name8 if temp_name9=="" & temp_name8!="" 
replace last_name=temp_name7 if temp_name8=="" & temp_name7!="" 
replace last_name=temp_name6 if temp_name7=="" & temp_name6!="" 
replace last_name=temp_name5 if temp_name6=="" & temp_name5!="" 
replace last_name=temp_name4 if temp_name5=="" & temp_name4!="" 
replace last_name=temp_name3 if temp_name4=="" & temp_name3!="" 
replace last_name=temp_name2 if temp_name3=="" & temp_name2!="" 
* the remaining is the middle name(s)
gen midname=""
replace midname=temp_name2+" "+temp_name3+" "+temp_name4+" "+temp_name5+" "+temp_name6+" "+temp_name7+" "+temp_name8+" "+temp_name9 if temp_name10!="" 
replace midname=temp_name2+" "+temp_name3+" "+temp_name4+" "+temp_name5+" "+temp_name6+" "+temp_name7+" "+temp_name8 if temp_name9!="" & temp_name10==""
replace midname=temp_name2+" "+temp_name3+" "+temp_name4+" "+temp_name5+" "+temp_name6+" "+temp_name7 if temp_name8!="" & temp_name9==""
replace midname=temp_name2+" "+temp_name3+" "+temp_name4+" "+temp_name5+" "+temp_name6 if temp_name7!="" & temp_name8==""
replace midname=temp_name2+" "+temp_name3+" "+temp_name4+" "+temp_name5 if temp_name6!="" & temp_name7==""
replace midname=temp_name2+" "+temp_name3+" "+temp_name4 if temp_name5!="" & temp_name6==""
replace midname=temp_name2+" "+temp_name3 if temp_name4!="" & temp_name5==""
replace midname=temp_name2 if temp_name3!="" & temp_name4==""

* if first has 1 letter and nick!="" & mid!="", nickname is for first  midname
gen l_first=length(first_name)
gen nickname_midname=""
replace nickname_midname=nickname+" "+temp_name3+" "+temp_name4+" "+temp_name5+" "+temp_name6+" "+temp_name7+" "+temp_name8+" "+temp_name9 if temp_name10!="" & l_first ==1 & nickname !="" & midname !=""
replace nickname_midname=nickname+" "+temp_name3+" "+temp_name4+" "+temp_name5+" "+temp_name6+" "+temp_name7+" "+temp_name8 if temp_name9!="" & temp_name10=="" & l_first ==1 & nickname !="" & midname !=""
replace nickname_midname=nickname+" "+temp_name3+" "+temp_name4+" "+temp_name5+" "+temp_name6+" "+temp_name7 if temp_name8!="" & temp_name9=="" & l_first ==1 & nickname !="" & midname !=""
replace nickname_midname=nickname+" "+temp_name3+" "+temp_name4+" "+temp_name5+" "+temp_name6 if temp_name7!="" & temp_name8=="" & l_first ==1 & nickname !="" & midname !=""
replace nickname_midname=nickname+" "+temp_name3+" "+temp_name4+" "+temp_name5 if temp_name6!="" & temp_name7=="" & l_first ==1 & nickname !="" & midname !=""
replace nickname_midname=nickname+" "+temp_name3+" "+temp_name4 if temp_name5!="" & temp_name6=="" & l_first ==1 & nickname !="" & midname !=""
replace nickname_midname=nickname+" "+temp_name3 if temp_name4!="" & temp_name5=="" & l_first ==1 & nickname !="" & midname !=""
replace nickname_midname=nickname if temp_name3!="" & temp_name4=="" & l_first ==1 & nickname !="" & midname !=""
replace nickname="" if nickname_midname!=""

foreach x in first_name midname nickname_midname last_name {
replace `x'=itrim(`x')
replace `x'=ltrim(`x')
replace `x'=rtrim(`x')
}

replace suffix=subinstr(suffix,"ph d", "phd",.)
replace suffix=subinstr(suffix,"professor", "prof",.)
replace suffix=subinstr(suffix,"doctor", "dr",.)

* generate the full name of the director (first+midname+last+suffix)
gen director_name_1=first_name+" "+midname+" "+last_name+" "+suffix
replace director_name_1=lower(director_name_1)

* generate the full name of the director (nickname+midname+last+suffix)
gen director_name_2=nickname+" "+midname+" "+last_name+" "+suffix
* if nickname is for midname:
replace director_name_2=first_name+" "+nickname_midname+" "+last_name+" "+suffix if nickname_midname!=""
replace director_name_2=lower(director_name_2)

* now generate the full name of the director withouth suffix (first+midname+last)
gen director_name_3=first_name+" "+midname+" "+last_name
replace director_name_3=lower(director_name_3)

* now generate the full name of the director withouth suffix (nickname+midname+last)
gen director_name_4=nickname+" "+midname+" "+last_name
* if nickname is for midname:
replace director_name_4=first_name+" "+nickname_midname+" "+last_name if nickname_midname!=""
replace director_name_4=lower(director_name_4)

* now generate the full name of the contributor withouth suffix and midname (first+last)
gen director_name_5=first_name+" "+last_name
* if first has one letter, then use midname
replace director_name_5=midname+" "+last_name if l_first==1
replace director_name_5=lower(director_name_5)

* now generate the full name of the contributor withouth suffix and midname (nickname+last)
gen director_name_6=nickname+" "+last_name
* if first has one letter, then use nickname for midname
replace director_name_6=nickname_midname+" "+last_name if l_first==1 & nickname_midname!=""
replace director_name_6=lower(director_name_6)

* now generate the full name of the contributor withouth midname (first+last+suffix)
gen director_name_7=first_name+" "+last_name+" "+suffix
replace director_name_7=lower(director_name_7)
* if first has one letter, then use midname
replace director_name_7=midname+" "+last_name+" "+suffix if l_first==1
replace director_name_7=lower(director_name_7)

* now generate the full name of the contributor withouth midname (nickname+last+suffix)
gen director_name_8=nickname+" "+last_name+" "+suffix
* if first has one letter, then use nickname for midname
replace director_name_8=nickname_midname+" "+last_name+" "+suffix if nickname_midname!=""
replace director_name_8=lower(director_name_8)

* get rid of trailing blanks
foreach y in director_name_1 director_name_2 director_name_3 director_name_4 director_name_5 director_name_6 director_name_7 director_name_8 {
replace `y'=rtrim(`y')
replace `y'=ltrim(`y')
replace `y'=itrim(`y')
}


/* check: do we have a unique director_name across the observations with same director_id?
preserve
duplicates drop DirectorID IndividualName, force
bys DirectorID: gen N=_N
tab N
restore

          N |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    419,766      100.00      100.00
------------+-----------------------------------
      Total |    419,766      100.00

*/

* Save dataset
save "${dir}/Data/Boardex/Directors_to_Match.dta", replace




		*********************************************************************************************
		*********************************************************************************************
		*** STEP 4:  Get for all these directors all the companies they have ever worked for      ***
		*********************************************************************************************
		*********************************************************************************************

		
foreach x in Director_Current_Board Director_Historic_Board_1 Director_Historic_Board_2 Director_Current_NONBoard Director_Historic_NONBoard_1 Director_Historic_NONBoard_2 {	
	use "${dir}/Data/Boardex/Director Profile/`x'.dta", clear
		drop if DirectorName=="UnKnown"
		keep DirectorID CompanyID CompanyName
		duplicates drop DirectorID CompanyID, force
		destring DirectorID, replace
		save "${dir}/Data/Boardex/Director Profile/`x'_temp.dta", replace
		}
		
foreach x in SMDEs_Current_Board SMDEs_Historic_Board SMDEs_Current_NONBoard SMDEs_Historic_NONBoard1 SMDEs_Historic_NONBoard2 SMDEs_Historic_NONBoard3 SMDEs_Historic_NONBoard4 SMDEs_Historic_NONBoard5 {		
		use "${dir}/Data/Boardex/SMDEs Profile/`x'.dta", clear
		drop if DirectorName=="UnKnown"
		keep DirectorID CompanyID CompanyName
		duplicates drop DirectorID CompanyID, force
		destring DirectorID, replace
		save "${dir}/Data/Boardex/SMDEs Profile/`x'_temp.dta", replace
		}
		
use "${dir}/Data/Boardex/Directors_to_Match.dta", clear		
keep CompanyName CompanyID DirectorID IndividualName director_name_*		
		
foreach x in Director_Current_Board Director_Historic_Board_1 Director_Historic_Board_2 Director_Current_NONBoard Director_Historic_NONBoard_1 Director_Historic_NONBoard_2 {	
		append using "${dir}/Data/Boardex/Director Profile/`x'_temp.dta"
		}
foreach x in SMDEs_Current_Board SMDEs_Historic_Board SMDEs_Current_NONBoard SMDEs_Historic_NONBoard1 SMDEs_Historic_NONBoard2 SMDEs_Historic_NONBoard3 SMDEs_Historic_NONBoard4 SMDEs_Historic_NONBoard5 {		
		append using "${dir}/Data/Boardex/SMDEs Profile/`x'_temp.dta"
		}		
foreach x in Director_Current_Board Director_Historic_Board_1 Director_Historic_Board_2 Director_Current_NONBoard Director_Historic_NONBoard_1 Director_Historic_NONBoard_2 {	
		erase "${dir}/Data/Boardex/Director Profile/`x'_temp.dta"
		}
foreach x in SMDEs_Current_Board SMDEs_Historic_Board SMDEs_Current_NONBoard SMDEs_Historic_NONBoard1 SMDEs_Historic_NONBoard2 SMDEs_Historic_NONBoard3 SMDEs_Historic_NONBoard4 SMDEs_Historic_NONBoard5 {		
		erase "${dir}/Data/Boardex/SMDEs Profile/`x'_temp.dta"
		}		
		
foreach x in IndividualName director_name_1 director_name_2 director_name_3 director_name_4 director_name_5 director_name_6 director_name_7 director_name_8 {
spread `x', by(DirectorID)
}		
		
		
*******************************
***** Clean company names *****
*******************************

foreach x in CompanyName {
replace `x'=lower(`x')
replace `x'=rtrim(`x')
replace `x'=ltrim(`x')
replace `x'=itrim(`x')
}

gen CompanyName_Orig=CompanyName

* Alternative company names
gen str CompanyName2=regexs(0) if regexm(CompanyName,"(\().+( prior to)")
replace CompanyName2=subinstr(CompanyName2,"prior to","",.)
replace CompanyName2=subinstr(CompanyName2,"(","",.)
replace CompanyName2=subinstr(CompanyName2,")","",.)

gen str CompanyName3=regexs(0) if regexm(CompanyName,"(formerly known as).+(\) \()")
replace CompanyName3=subinstr(CompanyName3,"(","",.)
replace CompanyName3=subinstr(CompanyName3,")","",.)
replace CompanyName3=subinstr(CompanyName3,"formerly known as","",.)

* Clean main company name
replace CompanyName=regexr(CompanyName, "[(].+( prior to).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](formerly known as).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](de-listed).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](delisted).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](ceased trading).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](listed).+[)]"," ")

* Alternative company name
gen temp=CompanyName
split temp, parse("(") generate(CompanyName_temp)
drop temp
rename CompanyName_temp1 CompanyName1
rename CompanyName_temp2 CompanyName4
rename CompanyName_temp3 CompanyName5
rename CompanyName_temp4 CompanyName6
rename CompanyName_temp5 CompanyName7
rename CompanyName company_name
 
 * Clean all these names
forvalues x=1(1)7 {
replace CompanyName`x'=lower(CompanyName`x')
replace CompanyName`x'=rtrim(CompanyName`x')
replace CompanyName`x'=ltrim(CompanyName`x')
replace CompanyName`x'=itrim(CompanyName`x')
replace CompanyName`x'=subinstr(CompanyName`x',"(","",.)
replace CompanyName`x'=subinstr(CompanyName`x',")","",.)
replace CompanyName`x'="" if CompanyName`x'=="the"
replace CompanyName`x'=regexr(CompanyName`x',"^(aka)[ ]"," ")
}
forvalues x=1(1)7 {
replace CompanyName`x'=rtrim(CompanyName`x')
replace CompanyName`x'=ltrim(CompanyName`x')
replace CompanyName`x'=itrim(CompanyName`x')
gen CompanyName`x'_t=CompanyName`x'
replace CompanyName`x'=subinword(CompanyName`x', "corp", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "inc", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "holdings", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "holding", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "co", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "company", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "corporation", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "industries", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "group", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "companies", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "enterprises", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "cos", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "ltd", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "srl", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "spa", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "plc", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "llc", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "sa", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "lp", "",.)
replace CompanyName`x'=subinword(CompanyName`x', "llp", "",.)
replace CompanyName`x'=rtrim(CompanyName`x')
replace CompanyName`x'=ltrim(CompanyName`x')
replace CompanyName`x'=itrim(CompanyName`x')
replace CompanyName`x'=CompanyName`x'_t if CompanyName`x'=="" & CompanyName`x'_t!=""
drop CompanyName`x'_t
}

forvalues z=1(1)7 {
forvalues x=33/39 {
replace CompanyName`z'=subinstr(CompanyName`z',char(`x'),"",.)
}
foreach x in 40 41 42 43 44 45 46 47 96 {
replace CompanyName`z'=subinstr(CompanyName`z',char(`x'),"",.)
}
}

forvalues x=1(1)7 {
replace CompanyName`x'=lower(CompanyName`x')
replace CompanyName`x'=rtrim(CompanyName`x')
replace CompanyName`x'=ltrim(CompanyName`x')
replace CompanyName`x'=itrim(CompanyName`x')
}

* Reshape data
gen n=_n
preserve
keep n CompanyName1 CompanyName2 CompanyName3 CompanyName4 CompanyName5 CompanyName6 CompanyName7 
reshape long CompanyName, i(n) j(num)
drop if CompanyName==""
duplicates drop  CompanyName n, force
drop num
bys n: gen num=_n
reshape wide CompanyName, i(n) j(num)
tempfile temp
save "`temp'" , replace
restore

keep CompanyID DirectorID IndividualName director_name_* CompanyName_Orig n
merge 1:1 n using "`temp'"
drop _m
drop n

* Drop those that we do not need to match (core sample foreign firms)
preserve
use "${dir}/Data/Boardex/Directors_to_Match.dta", clear
keep DirectorID
duplicates drop DirectorID, force
unique DirectorID
save temp, replace
restore
merge m:1 DirectorID using temp
keep if _m==3
drop _m
erase temp.dta

* Keep only one Director-Company combination
duplicates drop DirectorID CompanyID, force

* Get info on whether director has midname, suffix, or nickname
preserve
use "${dir}/Data/Boardex/Directors_to_Match.dta", clear
duplicates drop DirectorID, force
keep midname suffix nickname nickname_midname DirectorID
save temp, replace
restore 
merge m:1 DirectorID using temp
drop _m
erase temp.dta

* Save dataset for the matching
save "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", replace



		*********************************************************************************************
		*********************************************************************************************
		*** 				STEP 5:  Match directors to contributions     			  			  ***
		*********************************************************************************************
		*********************************************************************************************


************************************************************		
***** STEP 1: PERFECT MATCH BY NAME AND COMPANY NAME   *****		
************************************************************		
		
**********************************************************************************
*** STEP 1A: exploit information on midname and suffix for those who have them *** 	
**********************************************************************************

* Clean data	
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if midname!="" & suffix!=""
keep DirectorID CompanyID director_name_1 CompanyName1-CompanyName5
save "${dir}/Data/Boardex/matching_directors_1.dta"	, replace	
restore
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if midname!="" & suffix!=""
keep if (nickname!="" | nickname_midname!="")
keep DirectorID CompanyID director_name_2 CompanyName1-CompanyName5
save "${dir}/Data/Boardex/matching_directors_2.dta"	, replace	
restore

forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributormname!="" & contributorsuffix!=""
keep contributor_name_1 transactionid bonicacid contributoroccupation contributoremployer
save "${dir}/Data/Contributions/matching_contr_1_`d'.dta", replace		
restore
}
forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributormname!="" & contributorsuffix!=""
keep contributor_name_2 transactionid bonicacid contributoroccupation contributoremployer
save "${dir}/Data/Contributions/matching_contr_2_`d'.dta", replace		
restore
}

* Matching
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_1 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_1 contributor_name_1
joinby contributor_name_1 contributoroccupation using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_1 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1a"
save "${dir}/Data/Matched Datasets/matched_1a_11occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_2 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_2 contributor_name_1
joinby contributor_name_1 contributoroccupation using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_1 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1a"
save "${dir}/Data/Matched Datasets/matched_1a_21occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_1 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_1 contributor_name_2
joinby contributor_name_2 contributoroccupation using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_2 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1a"
save "${dir}/Data/Matched Datasets/matched_1a_12occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_2 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_2 contributor_name_2
joinby contributor_name_2 contributoroccupation using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_2 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1a"
save "${dir}/Data/Matched Datasets/matched_1a_22occ_`d'_`c'.dta", replace
}
}

forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_1 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_1 contributor_name_1
joinby contributor_name_1 contributoremployer using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_1 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1a"
save "${dir}/Data/Matched Datasets/matched_1a_11emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_2 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_2 contributor_name_1
joinby contributor_name_1 contributoremployer using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_1 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1a"
save "${dir}/Data/Matched Datasets/matched_1a_21emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_1 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_1 contributor_name_2
joinby contributor_name_2 contributoremployer using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_2 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1a"
save "${dir}/Data/Matched Datasets/matched_1a_12emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_2 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_2 contributor_name_2
joinby contributor_name_2 contributoremployer using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_2 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1a"
save "${dir}/Data/Matched Datasets/matched_1a_22emp_`d'_`c'.dta", replace
}
}

* Append datasets of matches
use "${dir}/Data/Matched Datasets/matched_1a_11occ_1980_1.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/matched_1a_11occ_`d'_1.dta"
}
forvalues c=2(1)5 {
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/matched_1a_11occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1a_21occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1a_12occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1a_22occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1a_11emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1a_21emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1a_12emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1a_22emp_`d'_`c'.dta"
}
}
duplicates drop DirectorID transactionid bonicacid, force

save "${dir}/Data/Matched Datasets/matched_1a.dta", replace

foreach z in emp occ {
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1a_11`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1a_21`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1a_12`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1a_22`z'_`d'_`c'.dta"
}
}
}

erase "${dir}/Data/Boardex/matching_directors_1.dta"
erase "${dir}/Data/Boardex/matching_directors_2.dta"
forvalues d=1980(2)2018 {
erase "${dir}/Data/Contributions/matching_contr_1_`d'.dta"	
erase "${dir}/Data/Contributions/matching_contr_2_`d'.dta"			
}	
		
**********************************************************************************
*** STEP 1B: exploit information on midname for those who have it 			   *** 	
**********************************************************************************

* Clean data	
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if midname!="" 
keep DirectorID CompanyID director_name_3 CompanyName1-CompanyName5
save "${dir}/Data/Boardex/matching_directors_3.dta"	, replace	
restore
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if midname!=""
keep if (nickname!="" | nickname_midname!="")
keep DirectorID CompanyID director_name_4 CompanyName1-CompanyName5
save "${dir}/Data/Boardex/matching_directors_4.dta"	, replace	
restore

forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributormname!=""
keep contributor_name_3 transactionid bonicacid contributoroccupation contributoremployer
save "${dir}/Data/Contributions/matching_contr_3_`d'.dta", replace		
restore
}
forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributormname!=""
keep contributor_name_4 transactionid bonicacid contributoroccupation contributoremployer
save "${dir}/Data/Contributions/matching_contr_4_`d'.dta", replace		
restore
}


* Matching
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_3.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_3 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_3 contributor_name_3
joinby contributor_name_3 contributoroccupation using "${dir}/Data/Contributions/matching_contr_3_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_3 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1b"
save "${dir}/Data/Matched Datasets/matched_1b_33occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_4.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_4 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_4 contributor_name_3
joinby contributor_name_3 contributoroccupation using "${dir}/Data/Contributions/matching_contr_3_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_3 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1b"
save "${dir}/Data/Matched Datasets/matched_1b_43occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_3.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_3 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_3 contributor_name_4
joinby contributor_name_4 contributoroccupation using "${dir}/Data/Contributions/matching_contr_4_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_4 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1b"
save "${dir}/Data/Matched Datasets/matched_1b_34occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_4.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_4 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_4 contributor_name_4
joinby contributor_name_4 contributoroccupation using "${dir}/Data/Contributions/matching_contr_4_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_4 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1b"
save "${dir}/Data/Matched Datasets/matched_1b_44occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_3.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_3 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_3 contributor_name_3
joinby contributor_name_3 contributoremployer using "${dir}/Data/Contributions/matching_contr_3_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_3 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1b"
save "${dir}/Data/Matched Datasets/matched_1b_33emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_4.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_4 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_4 contributor_name_3
joinby contributor_name_3 contributoremployer using "${dir}/Data/Contributions/matching_contr_3_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_3 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1b"
save "${dir}/Data/Matched Datasets/matched_1b_43emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_3.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_3 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_3 contributor_name_4
joinby contributor_name_4 contributoremployer using "${dir}/Data/Contributions/matching_contr_4_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_4 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1b"
save "${dir}/Data/Matched Datasets/matched_1b_34emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_4.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_4 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_4 contributor_name_4
joinby contributor_name_4 contributoremployer using "${dir}/Data/Contributions/matching_contr_4_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_4 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1b"
save "${dir}/Data/Matched Datasets/matched_1b_44emp_`d'_`c'.dta", replace
}
}

* Append datasets of matches
use "${dir}/Data/Matched Datasets/matched_1b_33occ_1980_1.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/matched_1b_33occ_`d'_1.dta"
}
forvalues c=2(1)5 {
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/matched_1b_33occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1b_43occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1b_34occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1b_44occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1b_33emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1b_43emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1b_34emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1b_44emp_`d'_`c'.dta"
}
}
duplicates drop DirectorID transactionid bonicacid, force

save "${dir}/Data/Matched Datasets/matched_1b.dta", replace

foreach z in emp occ {
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1b_33`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1b_43`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1b_34`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1b_44`z'_`d'_`c'.dta"
}
}
}

erase "${dir}/Data/Boardex/matching_directors_3.dta"
erase "${dir}/Data/Boardex/matching_directors_4.dta"
forvalues d=1980(2)2018 {
erase "${dir}/Data/Contributions/matching_contr_3_`d'.dta"	
erase "${dir}/Data/Contributions/matching_contr_4_`d'.dta"			
}

		
**********************************************************************************
*** STEP 1C: exploit information on suffix for those who have it 			   *** 	
**********************************************************************************

* Clean data	
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if suffix!=""
keep DirectorID CompanyID director_name_7 CompanyName1-CompanyName5
save "${dir}/Data/Boardex/matching_directors_7.dta"	, replace	
restore
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if suffix!=""
keep if (nickname!="" | nickname_midname!="")
keep DirectorID CompanyID director_name_8 CompanyName1-CompanyName5
save "${dir}/Data/Boardex/matching_directors_8.dta"	, replace	
restore

forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributorsuffix!=""
keep contributor_name_7 transactionid bonicacid contributoroccupation contributoremployer
save "${dir}/Data/Contributions/matching_contr_7_`d'.dta", replace		
restore
}


* Matching
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_7.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_7 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_7 contributor_name_7
joinby contributor_name_7 contributoroccupation using "${dir}/Data/Contributions/matching_contr_7_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_7 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1c"
save "${dir}/Data/Matched Datasets/matched_1c_77occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_8.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_8 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_8 contributor_name_7
joinby contributor_name_7 contributoroccupation using "${dir}/Data/Contributions/matching_contr_7_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_7 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1c"
save "${dir}/Data/Matched Datasets/matched_1c_78occ_`d'_`c'.dta", replace
}
}

forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_7.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_7 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_7 contributor_name_7
joinby contributor_name_7 contributoremployer using "${dir}/Data/Contributions/matching_contr_7_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_7 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1c"
save "${dir}/Data/Matched Datasets/matched_1c_77emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_8.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_8 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_8 contributor_name_7
joinby contributor_name_7 contributoremployer using "${dir}/Data/Contributions/matching_contr_7_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_7 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1c"
save "${dir}/Data/Matched Datasets/matched_1c_78emp_`d'_`c'.dta", replace
}
}


* Append datasets of matches
use "${dir}/Data/Matched Datasets/matched_1c_77occ_1980_1.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/matched_1c_77occ_`d'_1.dta"
}
forvalues c=2(1)5 {
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/matched_1c_77occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1c_78occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1c_77emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1c_78emp_`d'_`c'.dta"
}
}
duplicates drop DirectorID transactionid bonicacid, force

save "${dir}/Data/Matched Datasets/matched_1c.dta", replace

foreach z in emp occ {
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1c_77`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1c_78`z'_`d'_`c'.dta"
}
}
}

erase "${dir}/Data/Boardex/matching_directors_7.dta"
erase "${dir}/Data/Boardex/matching_directors_8.dta"
forvalues d=1980(2)2018 {
erase "${dir}/Data/Contributions/matching_contr_7_`d'.dta"	
}


**********************************************************************************
*** STEP 1D: matching on first and last for everybody						   *** 	
**********************************************************************************

* Clean data	
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep DirectorID CompanyID director_name_5 CompanyName1-CompanyName5
save "${dir}/Data/Boardex/matching_directors_5.dta"	, replace	
restore
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if (nickname!="" | nickname_midname!="")
keep DirectorID CompanyID director_name_6 CompanyName1-CompanyName5
save "${dir}/Data/Boardex/matching_directors_6.dta"	, replace	
restore

forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep contributor_name_5 transactionid bonicacid contributoroccupation contributoremployer
save "${dir}/Data/Contributions/matching_contr_5_`d'.dta", replace		
restore
}
forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributormname!=""
keep contributor_name_6 transactionid bonicacid contributoroccupation contributoremployer
save "${dir}/Data/Contributions/matching_contr_6_`d'.dta", replace		
restore
}


* Matching
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_5.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_5 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_5 contributor_name_5
joinby contributor_name_5 contributoroccupation using "${dir}/Data/Contributions/matching_contr_5_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_5 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1d"
save "${dir}/Data/Matched Datasets/matched_1d_55occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_6.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_6 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_6 contributor_name_5
joinby contributor_name_5 contributoroccupation using "${dir}/Data/Contributions/matching_contr_5_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_5 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1d"
save "${dir}/Data/Matched Datasets/matched_1d_65occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_5.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_5 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_5 contributor_name_6
joinby contributor_name_6 contributoroccupation using "${dir}/Data/Contributions/matching_contr_6_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_6 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1d"
save "${dir}/Data/Matched Datasets/matched_1d_56occ_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_6.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_6 CompanyName`c' 
rename CompanyName`c' contributoroccupation
rename director_name_6 contributor_name_6
joinby contributor_name_6 contributoroccupation using "${dir}/Data/Contributions/matching_contr_6_`d'.dta"
rename contributoroccupation CompanyName_matched
rename contributor_name_6 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1d"
save "${dir}/Data/Matched Datasets/matched_1d_66occ_`d'_`c'.dta", replace
}
}


forvalues d=2016(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_5.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_5 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_5 contributor_name_5
joinby contributor_name_5 contributoremployer using "${dir}/Data/Contributions/matching_contr_5_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_5 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1d"
save "${dir}/Data/Matched Datasets/matched_1d_55emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_6.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_6 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_6 contributor_name_5
joinby contributor_name_5 contributoremployer using "${dir}/Data/Contributions/matching_contr_5_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_5 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1d"
save "${dir}/Data/Matched Datasets/matched_1d_65emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_5.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_5 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_5 contributor_name_6
joinby contributor_name_6 contributoremployer using "${dir}/Data/Contributions/matching_contr_6_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_6 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1d"
save "${dir}/Data/Matched Datasets/matched_1d_56emp_`d'_`c'.dta", replace
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
use "${dir}/Data/Boardex/matching_directors_6.dta", clear
keep if CompanyName`c'!=""
keep DirectorID CompanyID director_name_6 CompanyName`c' 
rename CompanyName`c' contributoremployer
rename director_name_6 contributor_name_6
joinby contributor_name_6 contributoremployer using "${dir}/Data/Contributions/matching_contr_6_`d'.dta"
rename contributoremployer CompanyName_matched
rename contributor_name_6 DirectorName_matched
keep DirectorID CompanyID transactionid bonicacid CompanyName_matched DirectorName_matched
gen step_match="1d"
save "${dir}/Data/Matched Datasets/matched_1d_66emp_`d'_`c'.dta", replace
}
}


* Append datasets of matches
use "${dir}/Data/Matched Datasets/matched_1d_55occ_1980_1.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/matched_1d_55occ_`d'_1.dta"
}
forvalues c=2(1)5 {
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/matched_1d_55occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1d_65occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1d_56occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1d_66occ_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1d_55emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1d_65emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1d_56emp_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_1d_66emp_`d'_`c'.dta"
}
}
duplicates drop DirectorID transactionid bonicacid, force

save "${dir}/Data/Matched Datasets/matched_1d.dta", replace

foreach z in emp occ {
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1d_55`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1d_65`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1d_56`z'_`d'_`c'.dta"
}
}
forvalues d=1980(2)2018 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_1d_66`z'_`d'_`c'.dta"
}
}
}

erase "${dir}/Data/Boardex/matching_directors_5.dta"
erase "${dir}/Data/Boardex/matching_directors_6.dta"
forvalues d=1980(2)2018 {
erase "${dir}/Data/Contributions/matching_contr_5_`d'.dta"	
erase "${dir}/Data/Contributions/matching_contr_6_`d'.dta"			
}


**********************************************************************************
*** APPEND TOGETHER THE 4 DATASETS OF STEP 1								   *** 	
**********************************************************************************


use "${dir}/Data/Matched Datasets/matched_1a.dta", clear
append using "${dir}/Data/Matched Datasets/matched_1b.dta"
append using "${dir}/Data/Matched Datasets/matched_1c.dta"
append using "${dir}/Data/Matched Datasets/matched_1d.dta"

* If same donations matched multiple times, keep information on the highest step in which in was matched
gen temp=1 if step_match=="1a"
replace temp=2 if step_match=="1b"
replace temp=3 if step_match=="1c"
replace temp=4 if step_match=="1d"
drop step_match
rename temp step_match
label define labels 1 "1a" 2 "1b" 3 "1c" 4 "1d"
label values step_match labels 
destring step_match, replace
bys DirectorID transactionid bonicacid: egen min=min(step_match)
drop step_match
rename min step_match
duplicates drop DirectorID transactionid bonicacid, force

save "${dir}/Data/Matched Datasets/matched_STEP_1.dta", replace



***************************************************************************		
***** STEP 2: PERFECT MATCH BY NAME AND FUZZY MATCH BY COMPANY NAME   *****		
***************************************************************************

* Always exclude transaction_id already matched in Step 1	

preserve
use "${dir}/Data/Matched Datasets/matched_STEP_1.dta", clear
duplicates drop transactionid, force
keep transactionid
save "temp_already_found.dta", replace
restore


**********************************************************************************
*** STEP 2A: exploit information on midname and suffix for those who have them *** 	
**********************************************************************************

* Clean data	
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if midname!="" & suffix!=""
keep DirectorID CompanyID director_name_1 CompanyName1-CompanyName5
rename director_name_1 DirectorName_matched
save "${dir}/Data/Boardex/matching_directors_1.dta"	, replace	
restore
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if midname!="" & suffix!=""
keep if (nickname!="" | nickname_midname!="")
keep DirectorID CompanyID director_name_2 CompanyName1-CompanyName5
rename director_name_2 DirectorName_matched
save "${dir}/Data/Boardex/matching_directors_2.dta"	, replace	
restore

forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributormname!="" & contributorsuffix!=""
keep contributor_name_1 transactionid bonicacid contributoremployer
merge 1:1 transactionid using "temp_already_found.dta"
keep if _m==1
drop _m
rename contributor_name_1 DirectorName_matched
save "${dir}/Data/Contributions/matching_contr_1_`d'.dta", replace		
restore
}
forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributormname!="" & contributorsuffix!=""
keep contributor_name_2 transactionid bonicacid contributoremployer
merge 1:1 transactionid using "temp_already_found.dta"
keep if _m==1
drop _m
rename contributor_name_2 DirectorName_matched
save "${dir}/Data/Contributions/matching_contr_2_`d'.dta", replace		
restore
}

* Matching
forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName* 
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2a"
save "${dir}/Data/Matched Datasets/master_2a_11_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2a_11_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName* 
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2a"
save "${dir}/Data/Matched Datasets/master_2a_21_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2a_21_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName* 
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2a"
save "${dir}/Data/Matched Datasets/master_2a_12_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2a_12_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName*
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2a"
save "${dir}/Data/Matched Datasets/master_2a_22_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2a_22_`d'.dta", replace
}

* Append datasets of matches
use "${dir}/Data/Matched Datasets/master_2a_11_1980.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2a_11_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2a_21_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2a_12_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2a_22_`d'.dta"
}
duplicates drop DirectorID transactionid, force
gen double id_master=_n
save "${dir}/Data/Matched Datasets/master_2a.dta", replace

use "${dir}/Data/Matched Datasets/using_2a_11_1980.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2a_11_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2a_21_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2a_12_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2a_22_`d'.dta"
}
duplicates drop transactionid, force
* Save using data (only if there is the name of employer)
keep if contributoremployer!=""
gen double id_using=_n
save "${dir}/Data/Matched Datasets/using_2a.dta", replace


forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2a_11_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2a_21_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2a_12_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2a_22_`d'.dta"
}

forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2a_11_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2a_21_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2a_12_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2a_22_`d'.dta"
}

erase "${dir}/Data/Boardex/matching_directors_1.dta"
erase "${dir}/Data/Boardex/matching_directors_2.dta"
forvalues d=1980(2)2018 {
erase "${dir}/Data/Contributions/matching_contr_1_`d'.dta"	
erase "${dir}/Data/Contributions/matching_contr_2_`d'.dta"			
}	




**********************************************************************************
*** STEP 2B: exploit information on midname for those who have it			   *** 	
**********************************************************************************

* Clean data	
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if midname!=""
keep DirectorID CompanyID director_name_3 CompanyName1-CompanyName5
rename director_name_3 DirectorName_matched
save "${dir}/Data/Boardex/matching_directors_1.dta"	, replace	
restore
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if midname!=""
keep if (nickname!="" | nickname_midname!="")
keep DirectorID CompanyID director_name_4 CompanyName1-CompanyName5
rename director_name_4 DirectorName_matched
save "${dir}/Data/Boardex/matching_directors_2.dta"	, replace	
restore

forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributormname!="" 
keep contributor_name_3 transactionid bonicacid contributoremployer
merge 1:1 transactionid using "temp_already_found.dta"
keep if _m==1
drop _m
rename contributor_name_3 DirectorName_matched 
save "${dir}/Data/Contributions/matching_contr_1_`d'.dta", replace		
restore
}
forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributormname!=""
keep contributor_name_4 transactionid bonicacid contributoremployer
merge 1:1 transactionid using "temp_already_found.dta"
keep if _m==1
drop _m
rename contributor_name_4 DirectorName_matched 
save "${dir}/Data/Contributions/matching_contr_2_`d'.dta", replace		
restore
}

* Matching
forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName* 
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2b"
save "${dir}/Data/Matched Datasets/master_2b_11_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2b_11_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName* 
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2b"
save "${dir}/Data/Matched Datasets/master_2b_21_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2b_21_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName* 
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2b"
save "${dir}/Data/Matched Datasets/master_2b_12_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2b_12_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName*
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2b"
save "${dir}/Data/Matched Datasets/master_2b_22_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2b_22_`d'.dta", replace
}

* Append datasets of matches
use "${dir}/Data/Matched Datasets/master_2b_11_1980.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2b_11_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2b_21_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2b_12_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2b_22_`d'.dta"
}
duplicates drop DirectorID transactionid, force
save "${dir}/Data/Matched Datasets/master_2b.dta", replace

use "${dir}/Data/Matched Datasets/using_2b_11_1980.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2b_11_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2b_21_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2b_12_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2b_22_`d'.dta"
}
duplicates drop transactionid, force
* Save using and master data only if there is the name of employer
keep if contributoremployer!=""
gen double id_using=_n
save "${dir}/Data/Matched Datasets/using_2b.dta", replace

use "${dir}/Data/Matched Datasets/master_2b.dta", clear	
merge m:1 DirectorID transactionid using "${dir}/Data/Matched Datasets/using_2b.dta"
keep if _m==3
drop _m contributoremployer id_using bonicacid
gen double id_master=_n
save "${dir}/Data/Matched Datasets/master_2b.dta", replace


forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2b_11_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2b_21_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2b_12_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2b_22_`d'.dta"
}

forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2b_11_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2b_21_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2b_12_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2b_22_`d'.dta"
}

erase "${dir}/Data/Boardex/matching_directors_1.dta"
erase "${dir}/Data/Boardex/matching_directors_2.dta"
forvalues d=1980(2)2018 {
erase "${dir}/Data/Contributions/matching_contr_1_`d'.dta"	
erase "${dir}/Data/Contributions/matching_contr_2_`d'.dta"			
}	


**********************************************************************************
*** STEP 2C: exploit information on suffix for those who have it			   *** 	
**********************************************************************************

* Clean data	
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if suffix!=""
keep DirectorID CompanyID director_name_7 CompanyName1-CompanyName5
rename director_name_7 DirectorName_matched
save "${dir}/Data/Boardex/matching_directors_1.dta"	, replace	
restore
preserve		
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if suffix!=""
keep if (nickname!="" | nickname_midname!="")
keep DirectorID CompanyID director_name_8 CompanyName1-CompanyName5
rename director_name_8 DirectorName_matched
save "${dir}/Data/Boardex/matching_directors_2.dta"	, replace	
restore

forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributorsuffix!="" 
keep contributor_name_7 transactionid bonicacid contributoremployer
merge 1:1 transactionid using "temp_already_found.dta"
keep if _m==1
drop _m
rename contributor_name_7 DirectorName_matched 
save "${dir}/Data/Contributions/matching_contr_1_`d'.dta", replace		
restore
}

* Matching
forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName* 
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2c"
save "${dir}/Data/Matched Datasets/master_2c_11_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2c_11_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
keep DirectorID CompanyID DirectorName_matched CompanyName* 
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
preserve
keep DirectorID CompanyID transactionid CompanyName* DirectorName_matched
gen step_match="2c"
save "${dir}/Data/Matched Datasets/master_2c_21_`d'.dta", replace
restore
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2c_21_`d'.dta", replace
}

* Append datasets of matches
use "${dir}/Data/Matched Datasets/master_2c_11_1980.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2c_11_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2c_21_`d'.dta"
}
duplicates drop DirectorID transactionid, force
gen double id_master=_n
save "${dir}/Data/Matched Datasets/master_2c.dta", replace

use "${dir}/Data/Matched Datasets/using_2c_11_1980.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2c_11_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2c_21_`d'.dta"
}
* Save using data (only if there is the name of employer)
keep if contributoremployer!=""
gen double id_using=_n
save "${dir}/Data/Matched Datasets/using_2c.dta", replace


forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2c_11_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2c_21_`d'.dta"
}

forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2c_11_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2c_21_`d'.dta"
}

erase "${dir}/Data/Boardex/matching_directors_1.dta"
erase "${dir}/Data/Boardex/matching_directors_2.dta"
forvalues d=1980(2)2018 {
erase "${dir}/Data/Contributions/matching_contr_1_`d'.dta"	
}	


**********************************************************************************
*** STEP 2D: matching on first and last for everybody			   			   *** 	
**********************************************************************************

* Clean data			
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep director_name_5 DirectorID
rename director_name_5 DirectorName_matched
duplicates drop DirectorID, force
save "${dir}/Data/Boardex/matching_directors_1.dta"	, replace			
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep if (nickname!="" | nickname_midname!="")
keep director_name_6 DirectorID
rename director_name_6 DirectorName_matched
duplicates drop DirectorID, force
save "${dir}/Data/Boardex/matching_directors_2.dta"	, replace	

forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributoremployer!="" 
keep contributor_name_5 transactionid contributoremployer bonicacid
merge 1:1 transactionid using "temp_already_found.dta"
keep if _m==1
drop _m 
rename contributor_name_5 DirectorName_matched 
save "${dir}/Data/Contributions/matching_contr_1_`d'.dta", replace		
restore
}
forvalues d=1980(2)2018 {
preserve	
use "${dir}/Data/Contributions/contributions_tomerge_`d'.dta", clear
keep if contributoremployer!="" 
keep if contributormname!=""
keep contributor_name_6 transactionid contributoremployer bonicacid
merge 1:1 transactionid using "temp_already_found.dta"
keep if _m==1
drop _m 
rename contributor_name_6 DirectorName_matched 
save "${dir}/Data/Contributions/matching_contr_2_`d'.dta", replace		
restore
}

* Matching
forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2d_11_`d'.dta", replace
keep DirectorID transactionid 
save "${dir}/Data/Matched Datasets/master_2d_11_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_1_`d'.dta"
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2d_21_`d'.dta", replace
keep DirectorID transactionid 
save "${dir}/Data/Matched Datasets/master_2d_21_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_1.dta", clear
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2d_12_`d'.dta", replace
keep DirectorID transactionid 
save "${dir}/Data/Matched Datasets/master_2d_12_`d'.dta", replace
}

forvalues d=1980(2)2018 {
use "${dir}/Data/Boardex/matching_directors_2.dta", clear
joinby DirectorName_matched using "${dir}/Data/Contributions/matching_contr_2_`d'.dta"
keep DirectorID transactionid contributoremployer bonicacid
save "${dir}/Data/Matched Datasets/using_2d_22_`d'.dta", replace
keep DirectorID transactionid 
save "${dir}/Data/Matched Datasets/master_2d_22_`d'.dta", replace 
}

* Append datasets of matches
use "${dir}/Data/Matched Datasets/master_2d_11_1980.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2d_11_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2d_21_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2d_12_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/master_2d_22_`d'.dta"
}
duplicates drop DirectorID transactionid, force
save "${dir}/Data/Matched Datasets/master_2d.dta", replace

use "${dir}/Data/Matched Datasets/using_2d_11_1980.dta", clear
forvalues d=1982(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2d_11_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2d_21_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2d_12_`d'.dta"
}
forvalues d=1980(2)2018 {
append using "${dir}/Data/Matched Datasets/using_2d_22_`d'.dta"
}
duplicates drop DirectorID transactionid, force
save "${dir}/Data/Matched Datasets/using_2d.dta", replace
keep if contributoremployer!="retired"
keep if contributoremployer!="self employed"
keep if contributoremployer!="selfemployed"
duplicates drop contributoremployer DirectorID bonicacid, force
gen double id_using=_n
save "${dir}/Data/Matched Datasets/using_2d_short.dta", replace

use "${dir}/Data/Matched Datasets/master_2d.dta", clear
merge 1:1 DirectorID transactionid using "${dir}/Data/Matched Datasets/using_2d_short.dta"
drop contributoremployer bonicacid
keep if _m==3
drop _m
save "${dir}/Data/Matched Datasets/master_2d_short.dta", replace

* Keep useful information for reclink step D	
use "${dir}/Data/Boardex/Directors_to_Match_Complete_History.dta", clear
keep DirectorID CompanyID CompanyName*
joinby DirectorID using "${dir}/Data/Matched Datasets/master_2d_short.dta"
gen double id_master=_n
save "${dir}/Data/Matched Datasets/master_2d_short_tomerge.dta", replace

forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2d_11_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2d_21_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2d_12_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/master_2d_22_`d'.dta"
}

forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2d_11_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2d_21_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2d_12_`d'.dta"
}
forvalues d=1980(2)2018 {
erase "${dir}/Data/Matched Datasets/using_2d_22_`d'.dta"
}

erase "${dir}/Data/Boardex/matching_directors_1.dta"
erase "${dir}/Data/Boardex/matching_directors_2.dta"
forvalues d=1980(2)2018 {
erase "${dir}/Data/Contributions/matching_contr_1_`d'.dta"	
erase "${dir}/Data/Contributions/matching_contr_2_`d'.dta"			
}	

erase "temp_already_found.dta"

**********************************************************************************
*** Reclink all of the steps									   			   *** 	
**********************************************************************************

*** STEP A
forvalues c=1(1)5 {
use "${dir}/Data/Matched Datasets/master_2a.dta", clear	
keep if CompanyName`c'!=""
keep DirectorID CompanyID transactionid CompanyName`c' DirectorName_matched id_master
rename CompanyName`c' contributoremployer
reclink transactionid DirectorID contributoremployer using "${dir}/Data/Matched Datasets/using_2a.dta", gen(quality_match) idusing(id_using) idmaster(id_master) required(transactionid DirectorID)
keep if _m==3
drop _m
save "${dir}/Data/Matched Datasets/matched_STEP_2a_to_check_`c'.dta"		
}
use "${dir}/Data/Matched Datasets/matched_STEP_2a_to_check_1.dta"
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_STEP_2a_to_check_`c'.dta"
}
duplicates drop DirectorID transactionid, force
save "${dir}/Data/Matched Datasets/matched_STEP_2a_to_check.dta", replace

forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_STEP_2a_to_check_`c'.dta"		
}

*** STEP B
use "${dir}/Data/Matched Datasets/master_2b.dta", clear	
sort transactionid DirectorID
egen double n=group(transactionid DirectorID)
gen group=1 if (n>=1 & n<200000)
replace group=2 if (n>=200000 & n<400000)
replace group=3 if (n>=400000 & n<600000)
replace group=4 if (n>=600000 & n<800000)
replace group=5 if (n>=800000 & n<1000000)
replace group=6 if (n>=1000000 & n<1200000)
replace group=7 if (n>=1200000 & n<1400000)
replace group=8 if (n>=1400000 & n<1600000)
replace group=9 if (n>=1600000 & n<1800000)
replace group=10 if (n>=1800000 & n<2000000)
replace group=11 if (n>=2000000 & n<2200000)
replace group=12 if (n>=2200000 & n<2400000)
replace group=13 if (n>=2400000 & n<2600000)
replace group=14 if (n>=2600000)
drop n
save "${dir}/Data/Matched Datasets/temp_2b.dta", replace

use "${dir}/Data/Matched Datasets/using_2b.dta", clear	
sort transactionid DirectorID
egen double n=group(transactionid DirectorID)
gen group=1 if (n>=1 & n<200000)
replace group=2 if (n>=200000 & n<400000)
replace group=3 if (n>=400000 & n<600000)
replace group=4 if (n>=600000 & n<800000)
replace group=5 if (n>=800000 & n<1000000)
replace group=6 if (n>=1000000 & n<1200000)
replace group=7 if (n>=1200000 & n<1400000)
replace group=8 if (n>=1400000 & n<1600000)
replace group=9 if (n>=1600000 & n<1800000)
replace group=10 if (n>=1800000 & n<2000000)
replace group=11 if (n>=2000000 & n<2200000)
replace group=12 if (n>=2200000 & n<2400000)
replace group=13 if (n>=2400000 & n<2600000)
replace group=14 if (n>=2600000)
drop n
save "${dir}/Data/Matched Datasets/temp_2b_using.dta", replace

forvalues i=1(1)14 {
use "${dir}/Data/Matched Datasets/temp_2b_using.dta", clear
keep if group==`i'
drop id_using
gen double id_using=_n
save "${dir}/Data/Matched Datasets/temp_2b_using_`i'.dta", replace
use "${dir}/Data/Matched Datasets/temp_2b.dta", clear
keep if group==`i'
drop id_master
gen double id_master=_n
forvalues c=1(1)5 {
preserve
keep if CompanyName`c'!=""
keep DirectorID CompanyID transactionid CompanyName`c' id_master
rename CompanyName`c' contributoremployer
reclink transactionid DirectorID contributoremployer using "${dir}/Data/Matched Datasets/temp_2b_using_`i'.dta", gen(quality_match) idusing(id_using) idmaster(id_master) required(transactionid DirectorID)
keep if _m==3
drop _m
save "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check_`c'_`i'.dta", replace
restore
}
erase "${dir}/Data/Matched Datasets/temp_2b_using_`i'.dta"
}
use "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check_1_1.dta", clear
append using "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check_2_1.dta"
append using "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check_3_1.dta"
append using "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check_4_1.dta"
append using "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check_5_1.dta"
forvalues i=2(1)14 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check_`c'_`i'.dta"
}
}
duplicates drop DirectorID transactionid, force
drop group
save "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check.dta", replace
forvalues i=1(1)14 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check_`c'_`i'.dta"
}
}
erase "${dir}/Data/Matched Datasets/temp_2b.dta"
erase "${dir}/Data/Matched Datasets/temp_2b_using.dta"


* STEP C
forvalues c=1(1)5 {
use "${dir}/Data/Matched Datasets/master_2c.dta", clear	
keep if CompanyName`c'!=""
keep DirectorID CompanyID transactionid CompanyName`c' DirectorName_matched id_master
rename CompanyName`c' contributoremployer
reclink transactionid DirectorID contributoremployer using "${dir}/Data/Matched Datasets/using_2c.dta", gen(quality_match) idusing(id_using) idmaster(id_master) required(transactionid DirectorID)
keep if _m==3
drop _m
save "${dir}/Data/Matched Datasets/matched_STEP_2c_to_check_`c'.dta"		
}
use "${dir}/Data/Matched Datasets/matched_STEP_2c_to_check_1.dta"
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_STEP_2c_to_check_`c'.dta"
}
duplicates drop DirectorID transactionid, force
save "${dir}/Data/Matched Datasets/matched_STEP_2c_to_check.dta", replace

forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_STEP_2c_to_check_`c'.dta"		
}

* STEP D
use "${dir}/Data/Matched Datasets/master_2d_short_tomerge.dta", clear	
drop CompanyName_Orig
sort transactionid DirectorID
egen double n=group(transactionid DirectorID)
gen group=1 if (n>=1 & n<50000)
forvalues x=2(1)474 {
replace group=`x' if (n>=50000*(`x'-1) & n<50000*`x')
}
replace group=475 if (n>=23700000)
drop n
save "${dir}/Data/Matched Datasets/temp_2d.dta", replace

use "${dir}/Data/Matched Datasets/using_2d_short.dta", clear
sort transactionid DirectorID
egen double n=group(transactionid DirectorID)
gen group=1 if (n>=1 & n<50000)
forvalues x=2(1)474 {
replace group=`x' if (n>=50000*(`x'-1) & n<50000*`x')
}
replace group=475 if (n>=23700000)
drop n
save "${dir}/Data/Matched Datasets/temp_2d_using.dta", replace


forvalues i=1(1)475 {
use "${dir}/Data/Matched Datasets/temp_2d_using.dta", clear
keep if group==`i'
drop id_using
gen double id_using=_n
save "${dir}/Data/Matched Datasets/temp_2d_using_`i'.dta", replace
use "${dir}/Data/Matched Datasets/temp_2d.dta", clear
keep if group==`i'
drop id_master
gen double id_master=_n
forvalues c=1(1)5 {
preserve
keep if CompanyName`c'!=""
keep DirectorID CompanyID transactionid CompanyName`c' id_master
rename CompanyName`c' contributoremployer
reclink transactionid DirectorID contributoremployer using "${dir}/Data/Matched Datasets/temp_2d_using_`i'.dta", gen(quality_match) idusing(id_using) idmaster(id_master) required(transactionid DirectorID)
keep if _m==3
drop _m
save "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check_`c'_`i'.dta"
restore
}
erase "${dir}/Data/Matched Datasets/temp_2d_using_`i'.dta"
}


use "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check_1_1.dta", clear
append using "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check_2_1.dta"
append using "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check_3_1.dta"
append using "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check_4_1.dta"
append using "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check_5_1.dta"
forvalues i=2(1)475 {
forvalues c=1(1)5 {
append using "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check_`c'_`i'.dta"
}
}
duplicates drop DirectorID transactionid, force
drop group
save "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check.dta", replace

forvalues i=1(1)475 {
forvalues c=1(1)5 {
erase "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check_`c'_`i'.dta"
}
}
erase "${dir}/Data/Matched Datasets/temp_2d.dta"
erase "${dir}/Data/Matched Datasets/using_2d_short.dta"
erase "${dir}/Data/Matched Datasets/temp_2d_using.dta"
erase "${dir}/Data/Matched Datasets/master_2d_short_tomerge.dta"
erase "${dir}/Data/Matched Datasets/master_2d_short.dta"


**********************************************************************************
*** Append All Reclinked Steps Together							   			   *** 	
**********************************************************************************

use "${dir}/Data/Matched Datasets/matched_STEP_2a_to_check.dta", clear
gen step_match="2a"
append using "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check.dta"
replace step_match="2b" if step_match==""
append using "${dir}/Data/Matched Datasets/matched_STEP_2c_to_check.dta"
replace step_match="2c" if step_match==""
append using "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check.dta"
replace step_match="2d" if step_match==""

drop id_master id_using DirectorName_matched

* Keep only one observation for each director-contributoremployer-Ucontributoremployer
duplicates drop contributoremployer Ucontributoremployer DirectorID, force
sort contributoremployer Ucontributoremployer DirectorID
gen observation_id=_n
save "${dir}/Data/Matched Datasets/matched_STEP_2.dta", replace


* 1) If quality match > 0.995, then it's a match
preserve
keep if quality_match>0.9951
keep DirectorID CompanyID transactionid step_match bonicacid contributoremployer Ucontributoremployer

* If same donations matched multiple times, keep information on the highest step in which it was matched
gen temp=5 if step_match=="2a"
replace temp=6 if step_match=="2b"
replace temp=7 if step_match=="2c"
replace temp=8 if step_match=="2d"
drop step_match
rename temp step_match
label define labels 5 "2a" 6 "2b" 7 "2c" 8 "2d"
label values step_match labels 
destring step_match, replace
bys DirectorID transactionid bonicacid: egen min=min(step_match)
drop step_match
rename min step_match

save "${dir}/Data/Matched Datasets/matched_STEP_2_MATCH_1.dta", replace
restore


* 2) If quality match < 0.75, then NOT a match


* 3) Other cases, need to adjudicate:
count if quality_match <=0.9951 & quality_match >=0.75
* How many? 130,796 cases to be checked
* Outsheet these cases, with also info on director's name:
preserve
use "${dir}/Data/Boardex/Directors_to_Match.dta", clear
keep DirectorID IndividualName
duplicates drop DirectorID, force
tempfile temp
save "`temp'" , replace
restore 
preserve
keep if quality_match <=0.9951 & quality_match >=0.75
keep quality_match contributoremployer Ucontributoremployer DirectorID
merge m:1 DirectorID using "`temp'"
drop if _m==2
drop _m
gen MATCH=.
gsort -quality_match
order contributoremployer Ucontributoremployer MATCH quality_match IndividualName DirectorID
export excel contributoremployer Ucontributoremployer MATCH quality_match IndividualName DirectorID using "${dir}/Data/Matched Datasets/STEP_2_TOCHECK.xlsx", replace first(var)
restore

* Checked file is "/Matched Datasets/STEP_2_CHECKED.dta"

keep if quality_match <=0.9951 & quality_match >=0.75
merge 1:1 contributoremployer Ucontributoremployer DirectorID using "${dir}/Data/Matched Datasets/STEP_2_CHECKED.dta"
* Note: 1 observation does not match because of a space in the string that appeared in the excel file. Assign manually (MATCH=1)
replace MATCH=1 if _m==1
drop if _m==2
drop _m
keep if MATCH==1
keep DirectorID CompanyID transactionid step_match bonicacid contributoremployer Ucontributoremployer

* If same donations matched multiple times, keep information on the highest step in which it was matched
gen temp=5 if step_match=="2a"
replace temp=6 if step_match=="2b"
replace temp=7 if step_match=="2c"
replace temp=8 if step_match=="2d"
drop step_match
rename temp step_match
label define labels 5 "2a" 6 "2b" 7 "2c" 8 "2d"
label values step_match labels 
destring step_match, replace
bys DirectorID transactionid bonicacid: egen min=min(step_match)
drop step_match
rename min step_match

save "${dir}/Data/Matched Datasets/matched_STEP_2_MATCH_2.dta", replace

* Get all Bonica_ID associated with a combination of director - contributor employer - U contributor employer
use "${dir}/Data/Matched Datasets/matched_STEP_2_MATCH_1.dta", clear
append using "${dir}/Data/Matched Datasets/matched_STEP_2_MATCH_2.dta"
bys DirectorID transactionid bonicacid: egen min=min(step_match)
drop step_match
rename min step_match
save "${dir}/Data/Matched Datasets/temp.dta", replace

use "${dir}/Data/Matched Datasets/matched_STEP_2a_to_check.dta", clear
gen step_match="2a"
append using "${dir}/Data/Matched Datasets/matched_STEP_2b_to_check.dta"
replace step_match="2b" if step_match==""
append using "${dir}/Data/Matched Datasets/matched_STEP_2c_to_check.dta"
replace step_match="2c" if step_match==""
append using "${dir}/Data/Matched Datasets/matched_STEP_2d_to_check.dta"
replace step_match="2d" if step_match==""

drop id_master id_using DirectorName_matched step_match

merge m:1 DirectorID transactionid bonicacid contributoremployer Ucontributoremployer using "${dir}/Data/Matched Datasets/temp.dta"

egen group=group(DirectorID contributoremployer Ucontributoremployer)
bys group: egen max=max(_m)
bys group: egen min=min(_m)
bys group: egen min_step=min(step_match)

replace _m=max if max==3 & _m==1
replace step_match=min_step 

keep if _m==3
keep DirectorID CompanyID transactionid bonicacid step_match

* Keep all directorID BonicaCID combinations and keep the minimum step in which the 2 were matched
bys DirectorID bonicacid: egen min=min(step_match)
drop step_match
rename min step_match
duplicates drop DirectorID bonicacid, force
keep DirectorID bonicacid step_match
save "${dir}/Data/Matched Datasets/matched_STEP_2_MATCH_FINAL.dta"

erase "${dir}/Data/Matched Datasets/temp.dta"

**********************************************************************************
*** Append All Matches together - From STEP 1 AND 2				   			   *** 	
**********************************************************************************

* Take Step 1, keep all directorID BonicaCID combinations and keep the minimum step in which the 2 were matched
use "${dir}/Data/Matched Datasets/matched_STEP_1.dta", clear
bys DirectorID bonicacid: egen min=min(step_match)
drop step_match
rename min step_match
duplicates drop DirectorID bonicacid, force
keep DirectorID bonicacid step_match

* Append to Step 2, and eliminate duplicates of directorID BonicaCID (matched both in step 1 and 2) keeping the minimum step in which the 2 were matched
append using "${dir}/Data/Matched Datasets/matched_STEP_2_MATCH_FINAL.dta"
bys DirectorID bonicacid: egen min=min(step_match)
drop step_match
rename min step_match
duplicates drop DirectorID bonicacid, force

* How many bonicaID matched to multiple DirectorID
preserve
bys bonicacid: gen N=_N
duplicates drop bonicacid, force
tab N
restore
/*
          N |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    268,948       99.04       99.04
          2 |      2,464        0.91       99.95
          3 |        116        0.04       99.99
          4 |         23        0.01      100.00
          5 |          3        0.00      100.00
          6 |          3        0.00      100.00
          8 |          2        0.00      100.00
         11 |          1        0.00      100.00
------------+-----------------------------------
      Total |    271,560      100.00

* 99.04% of all Bonica ID donors are matched to a unique Director ID

*/


* How many after keeping only the bonicaID-DirectorID pairs that were matched at an earlier step
bys bonicacid: egen min=min(step_match)
keep if min==step_match
preserve
bys bonicacid: gen N=_N
duplicates drop bonicacid, force
tab N
restore
/*
          N |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    270,329       99.55       99.55
          2 |      1,165        0.43       99.98
          3 |         60        0.02      100.00
          4 |          5        0.00      100.00
          5 |          1        0.00      100.00
------------+-----------------------------------
      Total |    271,560      100.00

99.55% after deciding the multiple matching cases based on which ones were matched an an earlier stage of matching
The cases of duplicates left are those in which a given Bonica ID was matched to 2 or 3 or 4 Director IDs
in the SAME stage of matching

*/
drop min

* These few cases are bonicaID-DirectorID pairs that were matched in the same step of matching
* Drop at random
sort bonicacid DirectorID 
by bonicacid: gen N=_n
sort bonicacid N
drop if N!=1
drop N

format bonicacid %20.0g

save "${dir}/Data/Matched Datasets/DirectorID_BonicaID_MATCHES.dta", replace

erase "${dir}/Data/Matched Datasets/matched_STEP_2_MATCH_FINAL.dta"



		*********************************************************************************************************
		*********************************************************************************************************
		*** STEP 6:  Take all contributions of the Directors and Generate Dataset of Directors' Contributions ***
		*********************************************************************************************************
		*********************************************************************************************************

* Keep from contributions data only the contributions of directors and the variables of interest
forvalues d=1980(2)2018 {
cd "${dir}/Data/DIME_V3"
insheet using contribDB_`d'.csv, clear
keep if contributortype=="I" // only keep individual contributions
keep cycle transactionid transactiontype amount date bonicacid recipientname bonicarid recipientparty recipienttype recipientstate seat electiontype
format bonicacid %20.0g
merge m:1 bonicacid using "${dir}/Data/Matched Datasets/DirectorID_BonicaID_MATCHES.dta"
keep if _m==3
drop _m
cd "${dir}/Data/Contributions"
save "temp_`d'.dta", replace
}

use "temp_1980.dta", clear
forvalues d=1982(2)2018 {
append using "temp_`d'.dta"
}

forvalues d=1980(2)2018 {
erase "temp_`d'.dta"
}

save "${dir}/Data/Matched Datasets/Contributions_Matched.dta", replace


		***************************************
		***************************************
		*** merge with candidates' database ***
		***************************************
		***************************************

*** merge by bonicarid to get info on icpsr2 ***
insheet using "${dir}/Data/DIME_V3/dime_recipients_all_1979_2018.csv", clear
keep bonicarid icpsr2 icpsr cycle beforeswitchicpsr afterswitchicpsr name

/*
REPLACE THESE MISTAKES IN BONICA DATA: different icpsr2 and bonicarid assigned to the same person
*/
replace icpsr2="PA143065" if bonicarid=="cand142752"
replace bonicarid="cand101855" if bonicarid=="cand142752"
/*
REPLACE THESE MISTAKES IN BONICA DATA FOR CANDIDATES: same bonicarid assigned to 2 different candidates
* NOTE: I do this only for candidate ever in Congress, so with a "proper" icpsr2
*/
replace bonicarid="cand1383_2" if bonicarid=="cand1383" & icpsr2=="10788"
replace bonicarid="cand767_2" if bonicarid=="cand767" & icpsr2=="10719"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & icpsr2=="21344"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & name=="maloney, sean patrick"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & name=="MALONEY, SEAN PATRICK"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & name=="friends of maloney"
replace icpsr2="21344" if bonicarid=="cand1318_2"
replace icpsr2=icpsr if bonicarid=="cand1267" & name=="BROWN, ALVIN"
replace icpsr2=icpsr if bonicarid=="cand1267" & name=="BROWN, PRINCE"
replace icpsr2=icpsr if bonicarid=="cand1267" & name=="brown, prince"
replace bonicarid="cand1267_2" if bonicarid=="cand1267" & name=="BROWN, ALVIN"
replace bonicarid="cand1267_3" if bonicarid=="cand1267" & name=="BROWN, PRINCE"
replace bonicarid="cand1267_3" if bonicarid=="cand1267" & name=="brown, prince"
replace icpsr2=icpsr if bonicarid=="cand43796" & name=="meng, matt"
replace bonicarid="cand43796_2" if bonicarid=="cand43796" & name=="meng, matt"
replace icpsr2=icpsr if bonicarid=="cand3314" & name=="santana, alberto l"
replace icpsr2=icpsr if bonicarid=="cand3314" & name=="santana, alberto"
replace bonicarid="cand3314_2" if bonicarid=="cand3314" & name=="santana, alberto l"
replace bonicarid="cand3314_2" if bonicarid=="cand3314" & name=="santana, alberto"
replace icpsr2="H4NY12029" if name=="VELAZQUEZ, PEDRO L" & bonicarid=="H4NY12029"
replace bonicarid="cand131777" if name=="VELAZQUEZ, PEDRO L" & bonicarid=="H4NY12029"
replace icpsr2="41700" if bonicarid=="cand131015" & name=="MASTO, CATHERINE CORTEZ"
replace bonicarid="cand83453" if bonicarid=="cand131015" & name=="MASTO, CATHERINE CORTEZ"
replace icpsr2="H8WY00072" if bonicarid=="cand105249" & name=="SHARRATT, BRYAN EDWARDS"
replace bonicarid="cand57085" if bonicarid=="cand105249" & name=="SHARRATT, BRYAN EDWARDS"
replace icpsr2=icpsr if bonicarid=="cand181237" & name=="bartlett, dewey follett"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & name=="bartlett, dewey follett"
replace icpsr2=icpsr if bonicarid=="cand181237" & name=="bartlett, dewey f jr"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & name=="bartlett, dewey f jr"
replace icpsr2=icpsr if bonicarid=="cand241" & name=="early, joseph d jr"
replace bonicarid="cand241_2" if bonicarid=="cand241" & name=="early, joseph d jr"
replace icpsr2=icpsr if bonicarid=="cand271" & name=="MCDONALD, KATHRYN JACKSON"
replace bonicarid="cand271_2" if bonicarid=="cand271" & name=="MCDONALD, KATHRYN JACKSON"
replace icpsr2=icpsr if bonicarid=="cand323" & name=="corcoran, thomas e"
replace bonicarid="cand323_2" if bonicarid=="cand323" & name=="corcoran, thomas e"
replace icpsr2=icpsr if bonicarid=="cand323" & name=="CORCORAN, THOMAS E"
replace bonicarid="cand323_2" if bonicarid=="cand323" & name=="CORCORAN, THOMAS E"
replace icpsr2=icpsr if bonicarid=="cand393" & name=="ATKINSON, FREDERICK WAYNE"
replace bonicarid="cand393_2" if bonicarid=="cand393" & name=="ATKINSON, FREDERICK WAYNE"
replace icpsr2="H0FL18017" if bonicarid=="cand507" & name=="EVANS, CHARLES RAY"
replace bonicarid="cand36009" if bonicarid=="cand507" & name=="EVANS, CHARLES RAY"
replace icpsr2=icpsr if bonicarid=="cand551" & name=="smith, dennis c w"
replace bonicarid="cand551_2" if bonicarid=="cand551" & name=="smith, dennis c w"
replace icpsr2=icpsr if bonicarid=="cand551" & name=="SMITH, DENNIS C W"
replace bonicarid="cand551_2" if bonicarid=="cand551" & name=="SMITH, DENNIS C W"
replace icpsr2=icpsr if bonicarid=="cand839" & name=="lynch, stephen a"
replace bonicarid="cand839_2" if bonicarid=="cand839" & name=="lynch, stephen a"
replace icpsr2="CA_CP1356482" if bonicarid=="cand1082" & name=="chu, kansen"
replace bonicarid="cand9996708" if bonicarid=="cand1082" & name=="chu, kansen"
replace icpsr2=icpsr if bonicarid=="cand1220" & name=="ANDREWS, CAMILLE SPINELLO"
replace bonicarid="cand1220_2" if bonicarid=="cand1220" & name=="ANDREWS, CAMILLE SPINELLO"
replace icpsr2=icpsr if bonicarid=="cand1407" & name=="stockman, draft"
replace bonicarid="cand1407_2" if bonicarid=="cand1407" & name=="stockman, draft"
replace icpsr2=icpsr if bonicarid=="cand1521" & name=="fletcher, wynetta wendy"
replace bonicarid="cand1521_2" if bonicarid=="cand1521" & name=="fletcher, wynetta wendy"
replace icpsr2=icpsr if bonicarid=="cand1620" & name=="faircloth, resson oliver jr"
replace bonicarid="cand1620_2" if bonicarid=="cand1620" & name=="faircloth, resson oliver jr"
replace icpsr2=icpsr if bonicarid=="cand3303" & name=="kearns, don"
replace bonicarid="cand3303_2" if bonicarid=="cand3303" & name=="kearns, don"

/*
REPLACE THESE MISTAKES IN BONICA DATA: different bonicarid assigned to the same person
* NOTE: I do this only for candidate ever in Congress, so with a "proper" icpsr2
*/
replace bonicarid="cand44995" if bonicarid=="cand140862"
replace bonicarid="cand160674" if icpsr2=="21525" & cycle==2020

duplicates drop bonicarid, force
drop name cycle
save "temp_recipient.dta", replace



*** merge by bonicarid to get info on bonicacid ***
* NOTE: before importing dime_recipients_all_1979_2018.csv you need to open it and change the format of bonicacid to "number" (with 0 decimal places) otherwise it imports bonicacid as double (with loss of information)
insheet using "${dir}/Data/DIME_V3/dime_recipients_all_1979_2018.csv", clear
keep bonicarid bonicacid name cycle icpsr2
format bonicacid %20.0g

/*
CHANGE bonicarid AS DONE ABOVE
*/
replace bonicarid="cand101855" if bonicarid=="cand142752"
replace bonicarid="cand1383_2" if bonicarid=="cand1383" & icpsr2=="10788"
replace bonicarid="cand767_2" if bonicarid=="cand767" & icpsr2=="10719"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & icpsr2=="21344"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & name=="maloney, sean patrick"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & name=="MALONEY, SEAN PATRICK"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & name=="friends of maloney"
replace bonicarid="cand1267_2" if bonicarid=="cand1267" & name=="BROWN, ALVIN"
replace bonicarid="cand1267_3" if bonicarid=="cand1267" & name=="BROWN, PRINCE"
replace bonicarid="cand1267_3" if bonicarid=="cand1267" & name=="brown, prince"
replace bonicarid="cand43796_2" if bonicarid=="cand43796" & name=="meng, matt"
replace bonicarid="cand3314_2" if bonicarid=="cand3314" & name=="santana, alberto l"
replace bonicarid="cand3314_2" if bonicarid=="cand3314" & name=="santana, alberto"
replace bonicarid="cand131777" if name=="VELAZQUEZ, PEDRO L" & bonicarid=="H4NY12029"
replace bonicarid="cand83453" if bonicarid=="cand131015" & name=="MASTO, CATHERINE CORTEZ"
replace bonicarid="cand57085" if bonicarid=="cand105249" & name=="SHARRATT, BRYAN EDWARDS"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & name=="bartlett, dewey follett"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & name=="bartlett, dewey f jr"
replace bonicarid="cand241_2" if bonicarid=="cand241" & name=="early, joseph d jr"
replace bonicarid="cand271_2" if bonicarid=="cand271" & name=="MCDONALD, KATHRYN JACKSON"
replace bonicarid="cand323_2" if bonicarid=="cand323" & name=="corcoran, thomas e"
replace bonicarid="cand323_2" if bonicarid=="cand323" & name=="CORCORAN, THOMAS E"
replace bonicarid="cand393_2" if bonicarid=="cand393" & name=="ATKINSON, FREDERICK WAYNE"
replace bonicarid="cand36009" if bonicarid=="cand507" & name=="EVANS, CHARLES RAY"
replace bonicarid="cand551_2" if bonicarid=="cand551" & name=="smith, dennis c w"
replace bonicarid="cand551_2" if bonicarid=="cand551" & name=="SMITH, DENNIS C W"
replace bonicarid="cand839_2" if bonicarid=="cand839" & name=="lynch, stephen a"
replace bonicarid="cand9996708" if bonicarid=="cand1082" & name=="chu, kansen"
replace bonicarid="cand1220_2" if bonicarid=="cand1220" & name=="ANDREWS, CAMILLE SPINELLO"
replace bonicarid="cand1407_2" if bonicarid=="cand1407" & name=="stockman, draft"
replace bonicarid="cand1521_2" if bonicarid=="cand1521" & name=="fletcher, wynetta wendy"
replace bonicarid="cand1620_2" if bonicarid=="cand1620" & name=="faircloth, resson oliver jr"
replace bonicarid="cand3303_2" if bonicarid=="cand3303" & name=="kearns, don"
replace bonicarid="cand44995" if bonicarid=="cand140862"
replace bonicarid="cand160674" if icpsr2=="21525" & cycle==2020

* Note: some bonicarid have multiple bonicacid. But I checked manually and
* either these are not cases involving cases in Contributions_Matched.dta,
* or in case they are these are not self-donations

collapse (min) bonicacid, by(bonicarid)
rename bonicacid bonica_cid_2
save "temp_recipient_2.dta", replace


*** MERGE BOTH TEMP DATASETS TO OBTAIN ICPSR2 AND BONICACID ***

use "${dir}/Data/Matched Datasets/Contributions_Matched.dta", clear

/*
REPLACE THESE MISTAKES IN BONICA DATA: different icpsr2 and bonicarid assigned to the same person
*/
replace bonicarid="cand101855" if bonicarid=="cand142752"
/*
REPLACE THESE MISTAKES IN BONICA DATA FOR CANDIDATES (bonicarid is different for these candidates between contributions datasests and dime_recipients_all_1979_2018, or it is missing in contributions datasets)
*/
replace bonicarid="cand118157" if bonicarid=="cand140869"
replace bonicarid="cand137579" if bonicarid=="cand141325"
replace bonicarid="cand101855" if bonicarid=="cand152881"
replace bonicarid="cand143446" if bonicarid=="cand27327"
replace bonicarid="cand35025" if recipientname=="PORTANTINO, ANTHONY"
replace bonicarid="cand35025" if recipientname=="portantino, anthony"
replace bonicarid="cand35215" if recipientname=="SIMITIAN, S. JOSEPH"
replace bonicarid="cand35215" if recipientname=="simitian, s joseph"
replace bonicarid="cand35215" if recipientname=="friends of simitian, "
replace bonicarid="cand35215" if recipientname=="simitian for california state assembly, "
replace bonicarid="cand35215" if recipientname=="simitian for state senate, "
replace bonicarid="cand809" if recipientname=="TAYLOR, GENE"
replace recipientstate="MS" if recipientname=="TAYLOR, GENE"
replace bonicarid="cand9998006" if recipientname=="estes, dawn"
replace bonicarid="cand28614" if recipientname=="jones, dave"
replace bonicarid="cand143845" if recipientname=="perez, john a"
replace bonicarid="cand9996636" if recipientname=="weinreb, brad"
replace bonicarid="cand9993355" if recipientname=="cohn, steve"
replace bonicarid="cand9999143" if recipientname=="johannessen, mark"
replace bonicarid="cand28713" if recipientname=="krekorian, paul"
/*
REPLACE THESE MISTAKES IN BONICA DATA FOR CANDIDATES: same bonicarid assigned to 2 different candidates
* NOTE: I do this only for candidate ever in Congress, so with a "proper" icpsr2
*/
replace bonicarid="cand1383_2" if bonicarid=="cand1383" & recipientname=="JONES, WALTER"
replace bonicarid="cand767_2" if bonicarid=="cand767" & recipientname=="DUNCAN, JOHN J"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="MALONEY, SEAN PATRICK"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="SEAN PATRICK MALONEY FOR CONGRESS"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="friends of maloney"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="maloney, sean patrick"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="sean patrick maloney for congress"
replace bonicarid="cand1267_2" if bonicarid=="cand1267" & recipientname=="BROWN, ALVIN"
replace bonicarid="cand43796_2" if bonicarid=="cand43796" & recipientname=="meng, matt"
replace bonicarid="cand131777" if bonicarid=="H4NY12029" & recipientname=="VELAZQUEZ, PEDRO L"
replace bonicarid="cand83453" if bonicarid=="cand131015" & recipientname=="MASTO, CATHERINE CORTEZ"
replace bonicarid="cand57085" if bonicarid=="cand105249" & recipientname=="SHARRATT, BRYAN EDWARDS"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & recipientname=="bartlett, dewey follett"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & recipientname=="bartlett, dewey f jr"
replace bonicarid="cand241_2" if bonicarid=="cand241" & recipientname=="early, joseph d jr"
replace bonicarid="cand271_2" if bonicarid=="cand271" & recipientname=="MCDONALD, KATHRYN JACKSON"
replace bonicarid="cand323_2" if bonicarid=="cand323" & recipientname=="corcoran, thomas e"
replace bonicarid="cand323_2" if bonicarid=="cand323" & recipientname=="CORCORAN, THOMAS E"
replace bonicarid="cand393_2" if bonicarid=="cand393" & recipientname=="ATKINSON, FREDERICK WAYNE"
replace bonicarid="cand36009" if bonicarid=="cand507" & recipientname=="EVANS, CHARLES RAY"
replace bonicarid="cand551_2" if bonicarid=="cand551" & recipientname=="smith, dennis c w"
replace bonicarid="cand551_2" if bonicarid=="cand551" & recipientname=="SMITH, DENNIS C W"
replace bonicarid="cand839_2" if bonicarid=="cand839" & recipientname=="lynch, stephen a"
replace bonicarid="cand9996708" if bonicarid=="cand1082" & recipientname=="chu, kansen"
replace bonicarid="cand1220_2" if bonicarid=="cand1220" & recipientname=="ANDREWS, CAMILLE SPINELLO"
replace bonicarid="cand1407_2" if bonicarid=="cand1407" & recipientname=="stockman, draft"
replace bonicarid="cand1521_2" if bonicarid=="cand1521" & recipientname=="fletcher, wynetta wendy"
replace bonicarid="cand1620_2" if bonicarid=="cand1620" & recipientname=="faircloth, resson oliver jr"
replace bonicarid="cand3303_2" if bonicarid=="cand3303" & recipientname=="kearns, don"

/*
REPLACE THESE MISTAKES IN BONICA DATA: different bonicarid assigned to the same person
* NOTE: I do this only for candidate ever in Congress, so with a "proper" icpsr2
*/
replace bonicarid="cand44995" if bonicarid=="cand140862"

merge m:1 bonicarid using "temp_recipient.dta" /* Note: 0.1% of observations unmatched (missing bonicarid) */
drop if _m==2
drop _m

merge m:1 bonicarid using "temp_recipient_2.dta" /* Note: 0.1% of observations unmatched (missing bonicarid) */
drop if _m==2
drop _m

* Party switchers
replace icpsr2="20327" if icpsr2=="90327"
replace icpsr2="29440" if icpsr2=="29940"
replace icpsr2="29735" if icpsr2=="41105"
replace icpsr2="20101" if icpsr2=="41100"
replace icpsr2="20524" if icpsr2=="21169"
replace icpsr2="29542" if icpsr2=="99542"
replace icpsr2="29767" if icpsr2=="89767"
replace icpsr2="29767" if icpsr2=="99767"
replace icpsr2="20901" if icpsr2=="90901"
replace icpsr2="14828" if icpsr2=="94828"
replace icpsr2="29722" if icpsr2=="41103"
replace icpsr2="21144" if icpsr2=="20725"

* generate a variable that tells me if a director donated to her own campaign
gen self_financing_donation=0
replace self_financing_donation=1 if transactiontype =="15C"
replace self_financing_donation=1 if transactiontype =="16C"
replace self_financing_donation=1 if transactiontype =="16H"
replace self_financing_donation=1 if transactiontype =="16K"
replace self_financing_donation=1 if transactiontype =="17Z"
replace self_financing_donation=1 if bonicacid==bonica_cid_2 & bonica_cid_2!=.
drop bonica_cid_2

* Total contributions by each director: distribution
bys DirectorID : egen tot_dir=total(amount)
bys DirectorID cycle: egen tot_dir_cycle=total(amount)

* Total contributions by each director - excluding donations to own campaign: distribution
gen temp=amount*self_financing_donation
bys DirectorID: egen total_self_financing=total(temp)
gen tot_dir_noself=tot_dir-total_self_financing
drop total_self_financing 
bys DirectorID cycle: egen total_self_financing=total(temp)
gen tot_dir_noself_cycle=tot_dir_cycle-total_self_financing
drop total_self_financing temp

* Generate the new amount variable (amount_noself, which exclude donations to one's own campaign)
gen amount_noself=0
replace amount_noself=amount if self_financing_donation!=1

* Label variables
label var cycle "Cycle"
label var transactionid "Bonica Transaction ID"
label var transactiontype "Bonica Transaction Type"
label var amount "Amount Donation"
label var amount_noself "Amount Donation (excluded to own campaign)"
label var date "Date donation"
label var bonicacid "Bonica Contributor ID"
label var recipientname "Recipient Name"
label var bonicarid "Bonica Recipient ID"
label var recipientparty "Bonica Recipient Party"
label var recipienttype "Bonica Recipient type (CAND/COMM)"
label var recipientstate "Bonica Recipient State"
label var seat "Seat of Race"
label var electiontype "Type of Election (General/Primary/Open)"
label var DirectorID "Director ID"
label var step_match "Step in which director and donor matched"
label var icpsr2 "ICPSR2 Code"
label var self_financing_donation "=1 if bonicacid=bonica_cid_2"
label var tot_dir "Director's Total Lifetime Donations"
label var tot_dir_noself "Director's Total Lifetime Donations (excluded to own campaign)"
label var tot_dir_cycle "Director's Total Donations in Cycle"
label var tot_dir_noself_cycle "Director's Total Lifetime Donations in Cycle (excluded to own campaign)"

drop beforeswitchicpsr afterswitchicpsr

* Save Dataset with all donations
order DirectorID cycle bonicacid transactionid amount seat bonicarid icpsr2 recipientname recipientparty recipienttype recipientstate electiontype date transactiontype self_financing_donation amount_noself tot_dir tot_dir_cycle tot_dir_noself tot_dir_noself_cycle
sort DirectorID cycle bonicarid
save "${dir}/Data/Final Datasets/Contributions_Final.dta", replace 

erase "temp_recipient.dta"
erase "temp_recipient_2.dta"




		*********************************************************************************************
		*********************************************************************************************
		*** 				STEP 7:  Clean Careers of Directors      			  			      ***
		*********************************************************************************************
		*********************************************************************************************

use "${dir}/Data/Boardex/Directors_to_Match.dta", clear

* Create year and month of Annual Report
gen year=substr(AnnualReportYear,-2,.)
replace year="20" if year=="nt"
replace year="20"+year if year!="99"
replace year="1999" if year=="99"
gen month=substr(AnnualReportYear,1,3)
replace month="01" if month=="Jan"
replace month="02" if month=="Feb"
replace month="03" if month=="Mar"
replace month="04" if month=="Apr"
replace month="05" if month=="May"
replace month="06" if month=="Jun"
replace month="07" if month=="Jul"
replace month="08" if month=="Aug"
replace month="09" if month=="Sep"
replace month="10" if month=="Oct"
replace month="11" if month=="Nov"
replace month="12" if month=="Dec"
replace month="13" if month=="Cur"
gen year_month=year+month
destring month, replace
destring year, replace 
destring year_month, replace

* Keep only years before 2018 since contributions data end in the 2018 election cycle
keep if year<=2018


/*

preserve
duplicates drop year CompanyID, force
bys CompanyID: gen N=_N
bys CompanyID: egen min=min(year)
duplicates drop CompanyID, force
tab N
tab min
restore

* How many years does a company appear in the data?

          N |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      1,037        6.97        6.97
          2 |      1,354        9.11       16.08
          3 |      1,763       11.86       27.94
          4 |      1,791       12.04       39.98
          5 |      1,449        9.74       49.72
          6 |      1,121        7.54       57.26
          7 |        907        6.10       63.36
          8 |        714        4.80       68.16
          9 |        525        3.53       71.69
         10 |        383        2.58       74.27
         11 |        622        4.18       78.45
         12 |        443        2.98       81.43
         13 |        317        2.13       83.56
         14 |        322        2.17       85.73
         15 |        413        2.78       88.51
         16 |        641        4.31       92.82
         17 |        107        0.72       93.54
         18 |        202        1.36       94.90
         19 |        702        4.72       99.62
         20 |         57        0.38      100.00
------------+-----------------------------------
      Total |     14,870      100.00


* Which is the first year firms appear in the dataset?

        min |      Freq.     Percent        Cum.
------------+-----------------------------------
       1999 |         84        0.56        0.56
       2000 |      1,460        9.82       10.38
       2001 |        349        2.35       12.73
       2002 |         74        0.50       13.23
       2003 |      1,757       11.82       25.04
       2004 |        862        5.80       30.84
       2005 |        517        3.48       34.32
       2006 |        417        2.80       37.12
       2007 |      1,018        6.85       43.97
       2008 |        677        4.55       48.52
       2009 |         95        0.64       49.16
       2010 |        251        1.69       50.85
       2011 |        630        4.24       55.08
       2012 |        774        5.21       60.29
       2013 |        874        5.88       66.17
       2014 |      1,148        7.72       73.89
       2015 |      1,445        9.72       83.60
       2016 |      1,248        8.39       92.00
       2017 |        656        4.41       96.41
       2018 |        534        3.59      100.00
------------+-----------------------------------
      Total |     14,870      100.00


*/



* Create variable that gives the lag between a report year and the last one
preserve
duplicates drop AnnualReportYear CompanyID, force
sort CompanyID year month
by CompanyID: gen lag_report=year-year[_n-1]	
sort CompanyID year month
by CompanyID: gen lag_report_2=(lag_report[_n+1]!=. & lag_report[_n+1]>=2)
keep AnnualReportYear CompanyID	 lag_report lag_report_2 year_month
save "temp.dta", replace
restore	
merge m:1 AnnualReportYear CompanyID using "temp.dta"
drop _m
erase "temp.dta"
tab lag_report
/*
	  
 lag_report |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |     15,181        0.72        0.72
          1 |  2,055,463       97.50       98.22
          2 |     36,619        1.74       99.96
          3 |        692        0.03       99.99
          4 |        104        0.00      100.00
          5 |          4        0.00      100.00
          6 |         10        0.00      100.00
------------+-----------------------------------
      Total |  2,108,073      100.00

	  

* more than 98% of observations are of companies that appear in two consecutive years (or twice in the same year) 
*/

* Save dataset with observations with lag=0 or 1
preserve
keep if lag_report==. | lag_report==0 | lag_report==1
save "${dir}/Data/Boardex/Data_careers_1.dta", replace
restore

* For observations with lag>1, consider them in the firm in the years in between two years if they appear in the firm in both the years
preserve
keep if lag_report!=. & lag_report!=0 & lag_report!=1
save "${dir}/Data/Boardex/Data_careers_2.dta", replace
restore
keep if (lag_report!=. & lag_report>=2) | (lag_report_2==1)
sort CompanyID DirectorID year month
by CompanyID: gen onboard=(DirectorID==DirectorID[_n-1] & year==year[_n-1]+lag_report)
keep if onboard==1
keep if lag_report>=2
drop AnnualReportYear TotalDirectorsontheBoard NumberofSDsonBoard onboard month year_month  lag_report_2
expand lag_report, gen(new)
keep if new==1
bys CompanyID DirectorID year: gen n=_n
gen new_year=year-n
drop year lag_report new n
rename new_year year

* Append datasets together
append using "${dir}/Data/Boardex/Data_careers_2.dta"
append using "${dir}/Data/Boardex/Data_careers_1.dta"

keep CompanyID CompanyName year month AnnualReportYear DirectorID IndividualName Sector CompanyISIN CompanyTicker CompanyIndex TotalDirectorsontheBoard NumberofSDsonBoard DirectorTypeEDorSD Board NationalityMix year_month IndividualRole
sort CompanyID year month DirectorID
order CompanyID CompanyName year month AnnualReportYear DirectorID IndividualName Sector CompanyISIN CompanyTicker CompanyIndex TotalDirectorsontheBoard NumberofSDsonBoard DirectorTypeEDorSD Board NationalityMix year_month IndividualRole

erase "${dir}/Data/Boardex/Data_careers_1.dta"
erase "${dir}/Data/Boardex/Data_careers_2.dta"


		*********************************************************************************************
		*********************************************************************************************
		*** 				STEP 8:  Assign directors to the election cycles     			      ***
		*********************************************************************************************
		*********************************************************************************************
		
		
* Eliminates cases of firms having multiple annual reports in same year
duplicates drop CompanyID DirectorID year, force

gen cycle=.
replace cycle=2000 if year==1999 | year==2000
replace cycle=2002 if year==2001 | year==2002
replace cycle=2004 if year==2003 | year==2004
replace cycle=2006 if year==2005 | year==2006
replace cycle=2008 if year==2007 | year==2008
replace cycle=2010 if year==2009 | year==2010
replace cycle=2012 if year==2011 | year==2012
replace cycle=2014 if year==2013 | year==2014
replace cycle=2016 if year==2015 | year==2016
replace cycle=2018 if year==2017 | year==2018


* Variable that tells me for how many years an individual was in the firm in that cycle (1 or 2)
bys CompanyID DirectorID cycle: gen years_on_board_cycle=_N
/*
preserve
duplicates drop CompanyID DirectorID cycle, force
tab years_on_board_cycle
restore

years_on_bo |
  ard_cycle |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    486,312       33.61       33.61
          2 |    960,686       66.39      100.00
------------+-----------------------------------
      Total |  1,446,998      100.00

	  * 2/3 are in both years of a cycle
*/

keep CompanyID CompanyName DirectorID IndividualName Sector CompanyISIN CompanyTicker CompanyIndex DirectorTypeEDorSD Board NationalityMix cycle years_on_board_cycle IndividualRole

* keep only one observation for each director-company-cycle
duplicates drop CompanyID DirectorID cycle, force

* save final dataset
sort DirectorID CompanyID cycle
order DirectorID CompanyID cycle IndividualName CompanyName Sector CompanyISIN CompanyTicker CompanyIndex DirectorTypeEDorSD Board NationalityMix years_on_board_cycle IndividualRole

save "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned.dta", replace



		*********************************************************************************************
		*********************************************************************************************
		*** 			    	STEP 9:  Obtain Company Information     		        	      ***
		*********************************************************************************************
		*********************************************************************************************

* Import information on the companies in the core dataset		
import excel "${dir}/Data/Boardex/Boardex original/NA - Company Details/NA - Company Details.xlsx", first clear
keep CompanyID HOAddress1 HOAddress2 HOAddress3 HOAddress4 HOAddress5 HOURL CIKCode
save "temp.dta", replace	

* Open Compustat data (in WRDS is found at "Get Data/ Compustat - Capital IQ / Compustat / North America - Daily / Compustat Daily Updates - Fundamentals Annual")
* downloaded on May 29 2020
* see dataset to see which variables I downloaded
* I renamed the dataset Compustat_SIC_Codes.dta
use "${dir}/Data/WRDS/Compustat_SIC_Codes.dta", clear
keep gvkey tic conm cik sic
rename conm company_name_compustat
rename tic CompanyTicker
replace cik="0001303363" if cik=="001303363" /* 1 company cik is missing a zero in the code */
* save version of datasets with cik
preserve
keep if cik!=""
drop CompanyTicker
duplicates drop cik, force
save "temp2.dta", replace	
restore
* save version of datasets with ticker
preserve
keep if CompanyTicker!=""
drop cik
duplicates drop CompanyTicker, force
save "temp3.dta", replace	
restore

* CIK TO SIC FROM SEC FILINGS 
* Data downloaded on May 29 2020 from https://www.sec.gov/divisions/corpfin/organization/cfia.shtml
import excel "${dir}/Data/SEC/SECfilings_sic.xlsx", first clear
rename CompanyName Company_Name_SECfilings
rename CIKNumber cik
rename SICCode sic
tostring cik, replace
gen l=length(cik)
replace cik="00000000"+cik if l==2
replace cik="000000"+cik if l==4
replace cik="00000"+cik if l==5
replace cik="0000"+cik if l==6
replace cik="000"+cik if l==7
drop l
tostring sic, replace
gen l=length(sic)
replace sic="" if l==1
replace sic="0"+sic if l==3
drop l
save "temp4.dta", replace	

* Merge to dataset
use "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned.dta", clear
merge m:1 CompanyID using "temp.dta"
drop if _m==2

*** ONLY 16/14870 COMPANIES HAVE NO DETAILED INFORMATION IN BOARDEX
/*
tab _m

                 _merge |      Freq.     Percent        Cum.
------------------------+-----------------------------------
        master only (1) |        973        0.07        0.07
            matched (3) |  1,446,025       99.93      100.00
------------------------+-----------------------------------
                  Total |  1,446,998      100.00


unique CompanyID if _m==1
Number of unique values of CompanyID is  16
Number of records is  973

unique CompanyID if _m==3
Number of unique values of CompanyID is  14854
Number of records is  1446025

*/
drop _m

* make cik code in same format as in Compustat
rename CIKCode cik
tostring cik, replace
gen l=length(cik)
replace cik="00000000"+cik if l==2
replace cik="000000"+cik if l==4
replace cik="00000"+cik if l==5
replace cik="0000"+cik if l==6
replace cik="000"+cik if l==7
replace cik=""+cik if l==1
drop l

* merge with Compustat using cik
merge m:1 cik using "temp2.dta"
drop if _m==2

preserve
keep if _m==3
drop _m
save "found_1.dta", replace
restore

* merge with Compustat using Ticker (Note: Boardex reports multiple ticker codes for companies that change ticker over time
* but to be conservative I rely on ticker only for those companies not changing ticker code, so here we will have matches only if Boardex reports
* only unique ticker code over the sample period).
keep if _m==1
drop _m sic
merge m:1 CompanyTicker using "temp3.dta"
drop if _m==2

preserve
keep if _m==3
drop _m
save "found_2.dta", replace
restore

* merge with SEC filings data using cik
keep if _m==1
drop _m sic
merge m:1 cik using "temp4.dta"
drop if _m==2
drop _m

* Append all of them
append using "found_1.dta"
append using "found_2.dta"

*64% of firms and 74% of observations have a SIC code

* save final dataset
save "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned_SIC.dta", replace

erase "temp.dta"		
erase "temp2.dta"		
erase "temp3.dta"		
erase "temp4.dta"		
erase "found_1.dta"
erase "found_2.dta"		



		*********************************************************************************************
		*********************************************************************************************
		*** 				STEP 10:  Clean US House and Senate Committee Data     			      ***
		*********************************************************************************************
		*********************************************************************************************



*******************************************************		
* Committee assignment data, house 106-115 Congresses *
*******************************************************

* Note: I added to the file downloaded from Stewart's website the following missing MC in the 115th Congress:
* icpsr2 21370, "Pocan, Mark" in the appropriations committee (code 104)


import excel using "${dir}/Data/Federal Committee Assignmnents/house_assignments_103-115-3.xls", first clear
keep Congress Committeecode ID Name MajMin Party DateofAssignment DateofTermination SeniorPartyMember
* generate majority dummy
gen majority=(MajMin==1 | MajMin==4 | MajMin==6)
* generate chairman and ranking minority member
gen chairman=(SeniorPartyMember==11 | SeniorPartyMember==12 | SeniorPartyMember==13 | SeniorPartyMember==14 | SeniorPartyMember==16)
gen ranking_minority=(SeniorPartyMember==21 | SeniorPartyMember==22 | SeniorPartyMember==23 | SeniorPartyMember==24)
gen speaker_house=(SeniorPartyMember==31 | SeniorPartyMember==32 | SeniorPartyMember==33)
gen majority_leader=(SeniorPartyMember==41 | SeniorPartyMember==42 | SeniorPartyMember==43 | SeniorPartyMember==44)
gen majority_whip=(SeniorPartyMember==51 | SeniorPartyMember==52 | SeniorPartyMember==53)
gen minority_leader=(SeniorPartyMember==61 | SeniorPartyMember==62 | SeniorPartyMember==63)
gen minority_whip=(SeniorPartyMember==64 | SeniorPartyMember==65 | SeniorPartyMember==66)
drop SeniorPartyMember

* Generate election cycle corresponing to the various congresses
gen cycle=.

replace cycle=2000 if Congress==106
replace cycle=2002 if Congress==107
replace cycle=2004 if Congress==108
replace cycle=2006 if Congress==109
replace cycle=2008 if Congress==110
replace cycle=2010 if Congress==111
replace cycle=2012 if Congress==112
replace cycle=2014 if Congress==113
replace cycle=2016 if Congress==114
replace cycle=2018 if Congress==115

drop if Congress==. | Congress<106

* Rename variables
rename ID icpsr2
drop if icpsr2==.
rename Name name
rename Committeecode committeecode

* Drop a few cases of double observtions for the same icpsr-committee-cycle
drop if cycle==2010 & icpsr2==29315 & committeecode==124 & ranking_minority==1
drop if cycle==2010 & icpsr2==20955 & committeecode==124 & majority==0
drop if cycle==2008 & icpsr2==20131 & committeecode==128 & majority==1

duplicates drop cycle icpsr2 committeecode, force

* Typos and party switchers
replace icpsr2=20959 if name=="Deutch, Theodore E."
replace icpsr2=20327 if icpsr2==90327
replace icpsr2=29440 if icpsr2==29940
replace icpsr2=29735 if icpsr2==41105
replace icpsr2=20101 if icpsr2==41100
replace icpsr2=20524 if icpsr2==21169
replace icpsr2=29542 if icpsr2==99542
replace icpsr2=29767 if icpsr2==89767
replace icpsr2=29767 if icpsr2==99767
replace icpsr2=20901 if icpsr2==90901
replace icpsr2=14828 if icpsr2==94828
replace icpsr2=29722 if icpsr2==41103
replace icpsr2=21144 if icpsr2==20725
replace icpsr2=41308 if icpsr2==41208
replace icpsr2=20758 if icpsr2==15006 & name=="Bilirakis, Gus"
replace icpsr2=29550 if icpsr2==21161
replace icpsr2=21335 if icpsr2==15427 & name=="Kennedy, Joseph"
replace icpsr2=21185 if icpsr2==39037 & name=="Rigell, Scott"
replace icpsr2=15634 if icpsr2==15364 
replace icpsr2=21152 if icpsr2==21535 
replace icpsr2=21110 if icpsr2==21100 & name=="Bass, Karen"

drop Congress

* Generate dataset with congress leaders
preserve
keep if  speaker_house==1 |  majority_leader==1 | majority_whip==1 |  minority_leader==1 |  minority_whip==1
keep icpsr2 cycle speaker_house - minority_whip name majority Party
sort icpsr2 cycle
save "temp_leadership.dta", replace
restore

* Generate dataset with committee top members
preserve
keep if speaker_house==0 &  majority_leader==0 & majority_whip==0 &  minority_leader==0 &  minority_whip==0
keep icpsr2 cycle committeecode chairman ranking_minority Party
keep if chairman==1 | ranking_minority==1
save "temp_committee.dta", replace
restore 

* Reshape dataset to make it at the congress-MC level
keep if speaker_house==0 &  majority_leader==0 & majority_whip==0 &  minority_leader==0 &  minority_whip==0
keep icpsr2 committeecode cycle name majority Party
bys icpsr2 cycle: gen N=_N
qui: sum N
local max `r(max)'
forvalues x=1(1)`max' {
gen committee_`x'=.
}
bys icpsr2 cycle: gen n=_n
forvalues x=1(1)`max' {
replace committee_`x'=committeecode if n==`x'
}
collapse (max) committee_1 - committee_`max' (first) name majority Party, by(icpsr2 cycle)

* Get final dataset 
preserve
forvalues x=1(1)`max' {
use "temp_committee.dta", clear
rename committeecode committee_`x'
sort icpsr2 cycle committee_`x'
save "temp_committee_`x'.dta", replace
}
restore

save "temp_start.dta", replace
forvalues x=1(1)`max' {
use "temp_start.dta", clear
sort icpsr2 cycle committee_`x'
merge icpsr2 cycle committee_`x' using "temp_committee_`x'.dta"
drop if _m==2
rename chairman chairman_`x'
rename ranking_minority ranking_minority_`x'
replace chairman_`x'=0 if _m==1
replace ranking_minority_`x'=0 if _m==1
drop _m
save "temp_start.dta", replace
}
forvalues x=1(1)`max' {
replace chairman_`x'=. if committee_`x'==.
replace ranking_minority_`x'=. if committee_`x'==.
}
sort icpsr2 cycle
merge icpsr2 cycle using "temp_leadership.dta"
foreach x in speaker_house majority_leader majority_whip minority_leader minority_whip {
replace `x'=0 if _m==1
}
drop _m

gen chamber="H"

save "committees_103_115.dta", replace

*******************************************************		
* Committee assignment data, Senate 106-115 Congresses *
******************************************************* 

import excel using "${dir}/Data/Federal Committee Assignmnents/senate_assignments_103-115-3.xls", first clear
keep Congress CommitteeCode ID Name MajMin PartyCode DateofAppointment DateofTermination SeniorPartyMember
* generate majority dummy
gen majority=(MajMin==1 | MajMin==4 | MajMin==6)
* generate chairman and ranking minority member (see codebook on Stewart III's website)
gen chairman=(SeniorPartyMember==11 | SeniorPartyMember==12 | SeniorPartyMember==13)
gen ranking_minority=(SeniorPartyMember==21 | SeniorPartyMember==22 | SeniorPartyMember==23)
gen majority_leader=(SeniorPartyMember==41 | SeniorPartyMember==42 | SeniorPartyMember==43 | SeniorPartyMember==44)
gen majority_whip=(SeniorPartyMember==51)
gen minority_leader=(SeniorPartyMember==61 | SeniorPartyMember==62 | SeniorPartyMember==63)
gen minority_whip=(SeniorPartyMember==64 | SeniorPartyMember==65 | SeniorPartyMember==66)
drop SeniorPartyMember

* Generate election cycle corresponing to the various congresses
gen cycle=.

replace cycle=2000 if Congress==106
replace cycle=2002 if Congress==107
replace cycle=2004 if Congress==108
replace cycle=2006 if Congress==109
replace cycle=2008 if Congress==110
replace cycle=2010 if Congress==111
replace cycle=2012 if Congress==112
replace cycle=2014 if Congress==113
replace cycle=2016 if Congress==114
replace cycle=2018 if Congress==115

drop if Congress==. | Congress<106

* Rename variables
rename ID icpsr2
drop if icpsr2==.
rename Name name
rename CommitteeCode committeecode
rename PartyCode Party

* Drop a few cases of double observtions for the same icpsr-committee-cycle
drop if cycle==2010 & icpsr2==14910 & committeecode==306 & majority==0
drop if cycle==2010 & icpsr2==14910 & committeecode==358 & majority==0
drop if cycle==2010 & icpsr2==14910 & committeecode==388 & majority==0
drop if cycle==2010 & icpsr2==14910 & committeecode==419 & majority==0
drop if cycle==2010 & icpsr2==14910 & committeecode==332 & majority==0
drop if cycle==2014 & icpsr2==49700 & committeecode==332 & majority==1
drop if cycle==2002 & icpsr2==14240 & committeecode==336 & majority==0
drop if cycle==2018 & icpsr2==94659 & committeecode==380 & chairman==0

* Senate has democratic majority for 1.5 years of the 107 Congress, and Republican for .5 years: senate leaders enter twice, both as majority and as minority
* Assign Majority to Democrats as the senate leaders switch back and forth
drop if icpsr2==14031 & cycle==2002 & majority_leader==1
drop if icpsr2==14617 & cycle==2002 & minority_leader==1
drop if icpsr2==14908 & cycle==2002 & majority_whip==1
drop if icpsr2==15054 & cycle==2002 & minority_whip==1

duplicates drop cycle icpsr2 committeecode, force

* Typos and party switchers
replace icpsr2=20959 if name=="Deutch, Theodore E."
replace icpsr2=20327 if icpsr2==90327
replace icpsr2=29440 if icpsr2==29940
replace icpsr2=29735 if icpsr2==41105
replace icpsr2=20101 if icpsr2==41100
replace icpsr2=20524 if icpsr2==21169
replace icpsr2=29542 if icpsr2==99542
replace icpsr2=29767 if icpsr2==89767
replace icpsr2=29767 if icpsr2==99767
replace icpsr2=20901 if icpsr2==90901
replace icpsr2=14828 if icpsr2==94828
replace icpsr2=29722 if icpsr2==41103
replace icpsr2=21144 if icpsr2==20725
replace icpsr2=41308 if icpsr2==41208
replace icpsr2=20758 if icpsr2==15006 & name=="Bilirakis, Gus"
replace icpsr2=29550 if icpsr2==21161
replace icpsr2=21335 if icpsr2==15427 & name=="Kennedy, Joseph"
replace icpsr2=21185 if icpsr2==39037 & name=="Rigell, Scott"
replace icpsr2=15634 if icpsr2==15364 
replace icpsr2=21152 if icpsr2==21535 
replace icpsr2=21110 if icpsr2==21100 & name=="Bass, Karen"

drop Congress


* Generate dataset with congress leaders
preserve
keep if  majority_leader==1 | majority_whip==1 |  minority_leader==1 |  minority_whip==1
keep icpsr2 cycle majority_leader - minority_whip name majority Party
sort icpsr2 cycle
save "temp_leadership.dta", replace
restore

* Generate dataset with committee top members
preserve
keep if  majority_leader==0 & majority_whip==0 &  minority_leader==0 &  minority_whip==0
keep icpsr2 cycle committeecode chairman ranking_minority Party
keep if chairman==1 | ranking_minority==1
save "temp_committee.dta", replace
restore 

* Reshape dataset to make it at the congress-MC level
keep if  majority_leader==0 & majority_whip==0 &  minority_leader==0 &  minority_whip==0
keep icpsr2 committeecode cycle name majority Party
bys icpsr2 cycle: gen N=_N
qui: sum N
local max `r(max)'
forvalues x=1(1)`max' {
gen committee_`x'=.
}
bys icpsr2 cycle: gen n=_n
forvalues x=1(1)`max' {
replace committee_`x'=committeecode if n==`x'
}
collapse (max) committee_1 - committee_`max' (first) Party name majority, by(icpsr2 cycle)

preserve
forvalues x=1(1)`max' {
use "temp_committee.dta", clear
rename committeecode committee_`x'
sort icpsr2 cycle committee_`x'
save "temp_committee_`x'.dta", replace
}
restore
save "temp_start.dta", replace
forvalues x=1(1)`max' {
use "temp_start.dta", clear
sort icpsr2 cycle committee_`x'
merge icpsr2 cycle committee_`x' using temp_committee_`x'
drop if _m==2
rename chairman chairman_`x'
rename ranking_minority ranking_minority_`x'
replace chairman_`x'=0 if _m==1
replace ranking_minority_`x'=0 if _m==1
drop _m
save "temp_start.dta", replace
}
forvalues x=1(1)`max' {
replace chairman_`x'=. if committee_`x'==.
replace ranking_minority_`x'=. if committee_`x'==.
}
sort icpsr2 cycle
merge icpsr2 cycle using "temp_leadership.dta"
foreach x in majority_leader majority_whip minority_leader minority_whip {
replace `x'=0 if _m==1
}
drop _m

gen chamber="S"


*******************************************************		
*          Append House and Senate together           *
*******************************************************

append using "committees_103_115.dta"

* 5 MCs served in both H and S in same cycle: we keep them both, one obersvation for each chamber

* Make icpsr a string to match to other dataset
tostring icpsr2, replace


* label variables
label var icpsr2 "icpsr2"
label var cycle "electoral cycle"
label var chamber "chamber, H or S"
forvalues x=1(1)8 {
label var committee_`x' "committee code `x'"
label var chairman_`x' "chairman of committee `x'"
label var ranking_minority_`x' "ranking minority member of committee `x'"
}
label var name "name of MC in Committee Data"
label var majority_leader "majority leader"
label var majority_whip "majority whip"
label var minority_leader "minority leader"
label var minority_whip "minority whip"
label var speaker_house "speaker of the House"
label var majority "Majority in Chamber"
label var Party "Party code"

* Save final dataset
sort icpsr2 cycle
save "${dir}/Data/Federal Committee Assignmnents/committee_assignments_final.dta", replace

erase "committees_103_115.dta"
forvalues x=1(1)8 {
erase "temp_committee_`x'.dta"
}
erase "temp_committee.dta"
erase "temp_leadership.dta"
erase "temp_start.dta"


************************************************************		
* SUBCommittee assignment data, House 106-115 Congresses   *
************************************************************ 

forvalues x=106(1)115 {
import excel using "${dir}/Data/Federal Committee Assignmnents/subcommittee_assignment.xlsx", sheet("House `x'") first clear	
drop if Committee==""
forvalues i=1(1)33 {
capture destring memb`i', replace
}
rename Committee_code Committee_Code
reshape long memb, i(Committee_Code) j(n)
gen chairman=(n==1)
gen ranking_minority=(n==2)
rename memb icpsr2
drop n
drop if icpsr2==.
replace Committee_Code=Committee_Code+"_`x'"
gen Congress=`x'
save "temp_`x'.dta", replace
}

use "temp_106.dta", clear
forvalues x=107(1)115 {
append using "temp_`x'.dta"
}
forvalues x=106(1)115 {
erase "temp_`x'.dta"
}

* Generate election cycle corresponing to the various congresses
gen cycle=.
replace cycle=2000 if Congress==106
replace cycle=2002 if Congress==107
replace cycle=2004 if Congress==108
replace cycle=2006 if Congress==109
replace cycle=2008 if Congress==110
replace cycle=2010 if Congress==111
replace cycle=2012 if Congress==112
replace cycle=2014 if Congress==113
replace cycle=2016 if Congress==114
replace cycle=2018 if Congress==115
drop Congress
drop Committee

* Typos and party switchers
replace icpsr2=20327 if icpsr2==90327
replace icpsr2=29440 if icpsr2==29940
replace icpsr2=29735 if icpsr2==41105
replace icpsr2=20101 if icpsr2==41100
replace icpsr2=20524 if icpsr2==21169
replace icpsr2=29542 if icpsr2==99542
replace icpsr2=29767 if icpsr2==89767
replace icpsr2=29767 if icpsr2==99767
replace icpsr2=20901 if icpsr2==90901
replace icpsr2=14828 if icpsr2==94828
replace icpsr2=29722 if icpsr2==41103
replace icpsr2=21144 if icpsr2==20725
replace icpsr2=41308 if icpsr2==41208
replace icpsr2=29550 if icpsr2==21161
replace icpsr2=15634 if icpsr2==15364 
replace icpsr2=21152 if icpsr2==21535 

* Generate dataset with subcommittee top members
preserve
keep if chairman==1 | ranking_minority==1
save "temp_committee.dta", replace
restore 

* Reshape dataset to make it at the congress-MC level
keep icpsr2 Committee_Code cycle
bys icpsr2 cycle: gen N=_N
qui: sum N
local max `r(max)'
forvalues x=1(1)`max' {
gen Committee_Code_`x'=""
}
bys icpsr2 cycle: gen n=_n
forvalues x=1(1)`max' {
replace Committee_Code_`x'=Committee_Code if n==`x'
}
collapse (firstnm) Committee_Code_1 - Committee_Code_`max' , by(icpsr2 cycle)

* Get final dataset 
preserve
forvalues x=1(1)`max' {
use "temp_committee.dta", clear
rename Committee_Code Committee_Code_`x'
sort icpsr2 cycle Committee_Code_`x'
save "temp_committee_`x'.dta", replace
}
restore

save "temp_start.dta", replace
forvalues x=1(1)`max' {
use "temp_start.dta", clear
sort icpsr2 cycle Committee_Code_`x'
merge icpsr2 cycle Committee_Code_`x' using "temp_committee_`x'.dta"
drop if _m==2
rename chairman chairman_`x'
rename ranking_minority ranking_minority_`x'
replace chairman_`x'=0 if _m==1
replace ranking_minority_`x'=0 if _m==1
drop _m
save "temp_start.dta", replace
}
forvalues x=1(1)`max' {
replace chairman_`x'=. if Committee_Code_`x'==""
replace ranking_minority_`x'=. if Committee_Code_`x'==""
}

gen chamber="H"

save "SUBcommittees_103_115.dta", replace


************************************************************		
* SUBCommittee assignment data, Senate 106-115 Congresses   *
************************************************************ 

forvalues x=106(1)115 {
import excel using "${dir}/Data/Federal Committee Assignmnents/subcommittee_assignment.xlsx", sheet("Senate `x'") first clear	
drop if Committee==""
forvalues i=1(1)25 {
capture destring memb`i', replace
}
rename Committee_code Committee_Code
reshape long memb, i(Committee_Code) j(n)
gen chairman=(n==1)
gen ranking_minority=(n==2)
rename memb icpsr2
drop n
drop if icpsr2==.
replace Committee_Code=Committee_Code+"_`x'"
gen Congress=`x'
save "temp_`x'.dta", replace
}

use "temp_106.dta", clear
forvalues x=107(1)115 {
append using "temp_`x'.dta"
}
forvalues x=106(1)115 {
erase "temp_`x'.dta"
}

* Generate election cycle corresponing to the various congresses
gen cycle=.
replace cycle=2000 if Congress==106
replace cycle=2002 if Congress==107
replace cycle=2004 if Congress==108
replace cycle=2006 if Congress==109
replace cycle=2008 if Congress==110
replace cycle=2010 if Congress==111
replace cycle=2012 if Congress==112
replace cycle=2014 if Congress==113
replace cycle=2016 if Congress==114
replace cycle=2018 if Congress==115
drop Congress
drop Committee

* Typos and party switchers
replace icpsr2=20327 if icpsr2==90327
replace icpsr2=29440 if icpsr2==29940
replace icpsr2=29735 if icpsr2==41105
replace icpsr2=20101 if icpsr2==41100
replace icpsr2=20524 if icpsr2==21169
replace icpsr2=29542 if icpsr2==99542
replace icpsr2=29767 if icpsr2==89767
replace icpsr2=29767 if icpsr2==99767
replace icpsr2=20901 if icpsr2==90901
replace icpsr2=14828 if icpsr2==94828
replace icpsr2=29722 if icpsr2==41103
replace icpsr2=21144 if icpsr2==20725
replace icpsr2=41308 if icpsr2==41208
replace icpsr2=29550 if icpsr2==21161
replace icpsr2=15634 if icpsr2==15364 
replace icpsr2=21152 if icpsr2==21535 

* Generate dataset with subcommittee top members
preserve
keep if chairman==1 | ranking_minority==1
save "temp_committee.dta", replace
restore 

* Reshape dataset to make it at the congress-MC level
keep icpsr2 Committee_Code cycle
bys icpsr2 cycle: gen N=_N
qui: sum N
local max `r(max)'
forvalues x=1(1)`max' {
gen Committee_Code_`x'=""
}
bys icpsr2 cycle: gen n=_n
forvalues x=1(1)`max' {
replace Committee_Code_`x'=Committee_Code if n==`x'
}
collapse (firstnm) Committee_Code_1 - Committee_Code_`max' , by(icpsr2 cycle)

* Get final dataset 
preserve
forvalues x=1(1)`max' {
use "temp_committee.dta", clear
rename Committee_Code Committee_Code_`x'
sort icpsr2 cycle Committee_Code_`x'
save "temp_committee_`x'.dta", replace
}
restore

save "temp_start.dta", replace
forvalues x=1(1)`max' {
use "temp_start.dta", clear
sort icpsr2 cycle Committee_Code_`x'
merge icpsr2 cycle Committee_Code_`x' using "temp_committee_`x'.dta"
drop if _m==2
rename chairman chairman_`x'
rename ranking_minority ranking_minority_`x'
replace chairman_`x'=0 if _m==1
replace ranking_minority_`x'=0 if _m==1
drop _m
save "temp_start.dta", replace
}
forvalues x=1(1)`max' {
replace chairman_`x'=. if Committee_Code_`x'==""
replace ranking_minority_`x'=. if Committee_Code_`x'==""
}

gen chamber="S"


*******************************************************		
*          Append House and Senate together           *
*******************************************************

append using "SUBcommittees_103_115.dta"

* Make icpsr a string to match to other dataset
tostring icpsr2, replace

* label variables
label var icpsr2 "icpsr2"
label var cycle "electoral cycle"
label var chamber "chamber, H or S"
forvalues x=1(1)16 {
label var Committee_Code_`x' "subcommittee code `x'"
label var chairman_`x' "chairman of subcommittee `x'"
label var ranking_minority_`x' "ranking minority member of subcommittee `x'"
}

* Save final dataset
sort icpsr2 cycle
save "${dir}/Data/Federal Committee Assignmnents/SUBcommittee_assignments_final.dta", replace

erase "SUBcommittees_103_115.dta"
forvalues x=1(1)16 {
erase "temp_committee_`x'.dta"
}
erase "temp_committee.dta"
erase "temp_start.dta"

		*********************************************************************************************
		*********************************************************************************************
		*** 	STEP 11:  Calculate Lobbying Issues more covered by Industry-Year    		      ***
		*********************************************************************************************
		*********************************************************************************************


****************************************************************
*   Import Lobbying Data from Center for Responsive Politics   *
****************************************************************		

* Lobbying report data 
* Before importing it, I needed to make some changes in the text, otherwise "import delimited" does not work properly:
* replace | with "
* replace NATIONAL WOMEN"S LAW CENTER with NATIONAL WOMEN'S LAW CENTER
* replace (Formerly known as "Allen D. Freemyer Esq, PC") with (Formerly known as 'Allen D. Freemyer Esq, PC')
* replace MAERSK SEALAND ("MAERSK, INC") with MAERSK SEALAND ('MAERSK, INC')
* replace (FORMERLY KNOWN AS "SANOFI-AVENTIS U S INC ) with (FORMERLY KNOWN AS SANOFI-AVENTIS U S INC )
* replace Fleishman-Hillard Inc. (FleishmanHillard") with Fleishman-Hillard Inc. (FleishmanHillard)
* replace Blue Tusk Communications"F/K/A Edward Ward Blakely,Jr with Blue Tusk Communications F/K/A Edward Ward Blakely Jr
* replace Blue Tusk Communications F/K/A Edward Ward Blakely,Jr with Blue Tusk Communications F/K/A Edward Ward Blakely Jr
* replace CORPORACION DE EXPORTACIONES MEXICANAS, with CORPORACION DE EXPORTACIONES MEXICANAS
* replace Moss, McGee et al with Moss McGee et al
* replace F/K/A Edward Ward Blakely Jr"" with F/K/A Edward Ward Blakely Jr"
* replace ""Blue Tusk Communications F/K/A with "Blue Tusk Communications F/K/A
* a few the observations associated to the following firms are split into mulitple lines in the txt, and they need to be put into one manually: 
*	CORPORACION DE EXPORTACIONES MEXICANAS S.A. DE C.V. & MARVIN ROY FELDMAN
*	NATL ENVIRONMENTAL DEVELOPMENT ASSNS STATE & FED ENVIRONL RESPONSIBILITY PROJECT 
*	OUTDOOR ADVERTISERS ASSN OF AMERICA/AMERICAN COUNCIL OF HIGHWAY ADVERTISERS
*	McKenna Long & Aldridge (Afl.Org.ADOT) name chg only[FRIED SAND. PAUL. (AFFL. OR
*	NATIONAL NEIGHBORWORKS ASSOCIATION FORMERLY NATL NEIGHBORHOOD HOUSING NETWORK
*	CONFERENCE OF EDUCATIONAL ADMINISTRATORS OF SCHOOLS & PROGRAMS FOR THE DEAF
*	TEACHERS INSURANCE & ANNUITY ASSNOF AMERICA/COLLEGE RETIREMENT EQUITIES FUND
*	JAPAN AUTOMOBILE STANDARDS INTERNATIONALIZATION CENTER - WASHINGTON OFFICE
*	TRANSPORTACION MARITIMA MEXICANA/TRANSPORTACION FERROVIARIA MEXICANA (TFM)
*	ORGANIZATION FOR THE PROMO & ADVANCE OF SMALL TELECOMMUNICATIONS CO
*	BROTHERHOOD OF MAINTENANCE OF WAY EMP DIV OF THE INTL BROTHERHOOD OF TEAMSTERS
*	KENAN PATRICK JARBOE (SELF-EMPLOYED DOING BUSINESS AS JARBOE & ASSOCIATES)
*	Moss, McGee et al Moss, McGee et al
import delimited using "${dir}/Data/Lobbying CRP/lob_lobbying.txt", delimiter(",") clear

rename v1 uniqid
rename v2 registrant_law
rename v3 registrant
rename v4 isfirm
rename v5 client_raw
rename v6 client
rename v7 ultorg
rename v8 amount
rename v9 catcode
rename v10 source
rename v11 self
rename v12 includeNSFS
rename v13 use
rename v14 ind
rename v15 year
rename v16 type
rename v17 typelong
rename v18 affiliate

save "${dir}/Data/Lobbying CRP/lob_lobbying.dta", replace

* Issues lobbied data
* Before importing it, I needed to make some changes in the text, otherwise "import delimited" does not work properly:
* replace | with "
import delimited using "${dir}/Data/Lobbying CRP/lob_issue_NoSpecficIssue.txt", delimiter(",") clear

rename v1 SI_ID
rename v2 uniqid
rename v3 issueID
rename v4 issue
rename v5 year

save "${dir}/Data/Lobbying CRP/lob_issue_NoSpecficIssue.dta", replace

* Industry classification
import delimited using "${dir}/Data/Lobbying CRP/CRP_Categories.txt", varn(1) clear

save "${dir}/Data/Lobbying CRP/CRP_Categories.dta", replace

* Combine the 3 datasets
use "${dir}/Data/Lobbying CRP/lob_lobbying.dta", clear

drop if year==1998 | year==2019

joinby uniqid using "${dir}/Data/Lobbying CRP/lob_issue_NoSpecficIssue.dta", unm(master) /* Note: use joinby since same report can be for lobbying on multiple issues */

* Drop use="n" since these should not be used to obtain a list of issues or total expenditure
drop if use=="n"

/*

* a few reports are not matched to any issue (about 1% of those with a positive amount of expenditure)
* see here a discussion, it's just measurement error created by mistakes in transcription of reports
https://groups.google.com/forum/#!searchin/opensecrets-open-data/issues%7Csort:date/opensecrets-open-data/ooaO0IuuqGw/Po9ndiBFAQAJ

tab _m if amount >0

                       _merge |      Freq.     Percent        Cum.
------------------------------+-----------------------------------
          only in master data |     18,647        1.09        1.09
both in master and using data |  1,694,414       98.91      100.00
------------------------------+-----------------------------------
                        Total |  1,713,061      100.00

*/

* Need to drop these reports since we have no issue for these, but measurement error should be clearly random
keep if _m==3
drop _m

* Merge with Industry description (after correction of a few typos)
gen t=upper(catcode)
replace catcode=t if catcode!=t
merge m:1 catcode using "${dir}/Data/Lobbying CRP/CRP_Categories.dta" 
drop if _m==2
drop t _m

* Quarter/half-year
replace type=lower(type)
gen f=substr(type, 1,1)
gen s=substr(type, 2,1)
gen quarter=.
forvalues i=1(1)4 {
replace quarter=`i' if f=="q" & s=="`i'"
}
replace quarter=2 if f=="m"
replace quarter=4 if f=="e"

* First, calculate the total amount lobbied by a firm in a given quarter/half-year (for this, need to use only obs with ind=="y")
* Then, calculate all the issues on which the firm lobbied in that quarter/half-year (for this, need to use all obs)
* Finally, assign to all issues on which the firm lobbies in the quarter/half-year an equal fraction of the total amount lobbied by the firm in the quarter/half-year
* Note a very small number of firms are associated with 2 (not 1) industry in a given quarter/half-year. For these, we cosider the firm-industry as the unit of analysis, having 2 observations for that firm in that quarter/half-year
preserve
	keep if ind=="y"
	duplicates drop uniqid, force
	collapse (sum) amount, by(client year quarter catcode)
	tempfile temp
	save "`temp'" , replace
restore 
keep client year quarter catcode catname issueID industry sector sectorlong catorder
duplicates drop client year quarter catcode issueID, force
merge m:1 client year quarter catcode using "`temp'"
keep if _m==3 /* drop the 0.4% of observations with zero amount spent by the firm in the quarter/half-year */
drop _m	

* assign 1/N amount to issues covered by the firm in the quarter/half-year (with N being tot num of issues in quarter/half-year)
bys client year quarter catcode: gen N=_N
replace amount=amount/N
drop N

gen cycle=.
replace cycle=2000 if year==1999 | year==2000
replace cycle=2002 if year==2001 | year==2002
replace cycle=2004 if year==2003 | year==2004
replace cycle=2006 if year==2005 | year==2006
replace cycle=2008 if year==2007 | year==2008
replace cycle=2010 if year==2009 | year==2010
replace cycle=2012 if year==2011 | year==2012
replace cycle=2014 if year==2013 | year==2014
replace cycle=2016 if year==2015 | year==2016
replace cycle=2018 if year==2017 | year==2018


* Normalize amount since numbers that are too big introduce mistakes in the following code
replace amount=amount/1000
replace amount=round(amount)

* For industry "other" keep only the category "Welfare & Social Work" which is the only category represented by the firms in the Boardex dataset
drop if industry=="Other" & catname!="Welfare & Social Work"

* I use the industry variable (the intermediate level of classification, with 92 unique industries 

/*
 I create 2 measures of total donations by industry-cycle-issue (first IS time-varying, second is constant over time)
	1) take top 1 most lobbied issue by an industry in the cycle (or issues if there are ties)
	2) take top 3 most lobbied issues by an industry over sample period
*/


* 1)
preserve
collapse (sum) amount (first) sector sectorlong catorder, by(industry cycle issueID)
replace amount=round(amount)
bys industry cycle: egen temp=max(amount)
keep if amount==temp
drop temp amount
save "${dir}/Data/Lobbying CRP/top_issues_definition_1.dta", replace
restore

* 2)
preserve
collapse (sum) amount (first) sector sectorlong catorder, by(industry issueID)
replace amount=round(amount)
gsort industry -amount
by industry: gen n=_n
keep if n<=3
drop n amount
save "${dir}/Data/Lobbying CRP/top_issues_definition_2.dta", replace
restore

		*********************************************************************************************
		*********************************************************************************************
		*** 				STEP 12:  Assign Sectors to CRP Sector Classification   		      ***
		*********************************************************************************************
		*********************************************************************************************

* Here, I need to assign sectors based on the classification in my data, either SIC or Boardex Sector categories, to the corresponding sector in the Center for Responsive Politics
* Classification, since this is the classification used for the lobbying data


* Insheet mapping between SIC Code and CRP Sector (from CRP forum: https://groups.google.com/forum/#!forum/opensecrets-open-data)
import excel using "${dir}/Data/Lobbying CRP/CRP_Industry_to_SIC.xlsx", first clear	
drop MultSIC Category
rename SICcode sic
gen temp=(sic<1000)
tostring sic, replace
replace sic = "0"+sic if temp==1
drop temp
save "${dir}/Data/Lobbying CRP/CRP_Industry_to_SIC.dta", replace	

* Merge to my dataset
use "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned_SIC.dta", clear

* Sic 4888 does not exist. Check by hand the sic of the 5 companies with this mistake
replace sic="4833" if CompanyID==416216320
replace sic="4833" if CompanyID==100063834
replace sic="2721" if CompanyID==2675212739
replace sic="4841" if CompanyID==46946790
replace sic="4841" if CompanyID==295195651

merge m:1 sic using "${dir}/Data/Lobbying CRP/CRP_Industry_to_SIC.dta"
drop if _m==2
drop _m
replace Catcode="" if Catcode =="N/A"
preserve
keep if sic!="" & Catcode !="" 
save "temp_matched_1.dta", replace
restore

* For the sic that were not found, match by hand the sic code to the CRP Sector
keep if (sic!="" & Catcode =="") | sic=="" 
replace Catcode="A1000" if sic=="0100"		
replace Catcode="A3000" if sic=="0200"		
replace Catcode="A4000" if sic=="0700"		
replace Catcode="A5000" if sic=="0800"		
replace Catcode="E1220" if sic=="1000"		
replace Catcode="E1220" if sic=="1040"		
replace Catcode="E1220" if sic=="1090"		
replace Catcode="E1210" if sic=="1220"		
replace Catcode="E1230" if sic=="1400"		
replace Catcode="B0000" if sic=="1520"		
replace Catcode="B0000" if sic=="1540"		
replace Catcode="B0000" if sic=="1600"		
replace Catcode="B3000" if sic=="1700"		
replace Catcode="G2100" if sic=="2000"				
replace Catcode="A2000" if sic=="2020"		
replace Catcode="G2100" if sic=="2030"		
replace Catcode="G2100" if sic=="2040"		
replace Catcode="G2100" if sic=="2050"		
replace Catcode="G2200" if sic=="2060"		
replace Catcode="G2100" if sic=="2070"		
replace Catcode="G2600" if sic=="2080"				
replace Catcode="G2100" if sic=="2090"		
replace Catcode="A1300" if sic=="2100"		
replace Catcode="M8000" if sic=="2200"			
replace Catcode="M3100" if sic=="2300"		
replace Catcode="M3100" if sic=="2320"		
replace Catcode="M3100" if sic=="2330"		
replace Catcode="M3100" if sic=="2340"		
replace Catcode="M3100" if sic=="2390"		
replace Catcode="A5000" if sic=="2400"		
replace Catcode="A5000" if sic=="2430"		
replace Catcode="M4100" if sic=="2510"		
replace Catcode="M4100" if sic=="2520"		
replace Catcode="M2300" if sic=="2540"		
replace Catcode="M4100" if sic=="2590"		
replace Catcode="A5200" if sic=="2600"		
replace Catcode="M7000" if sic=="2650"		
replace Catcode="M7000" if sic=="2670"		
replace Catcode="C1300" if sic=="2750"		
replace Catcode="C1300" if sic=="2780"		
replace Catcode="C1300" if sic=="2790"		
replace Catcode="M1000" if sic=="2800"		
replace Catcode="M1000" if sic=="2810"		
replace Catcode="M1500" if sic=="2820"		
replace Catcode="M1300" if sic=="2840"		
replace Catcode="M1000" if sic=="2860"		
replace Catcode="A4100" if sic=="2870"		
replace Catcode="M1000" if sic=="2890"		
replace Catcode="B5000" if sic=="2950"		
replace Catcode="E1160" if sic=="2990"		
replace Catcode="M1500" if sic=="3050"		
replace Catcode="M1500" if sic=="3060"		
replace Catcode="M1500" if sic=="3080"		
replace Catcode="M3200" if sic=="3100"		
replace Catcode="M3200" if sic=="3140"		
replace Catcode="M7200" if sic=="3220"		
replace Catcode="B5100" if sic=="3250"		
replace Catcode="M4000" if sic=="3260"		
replace Catcode="B5100" if sic=="3270"		
replace Catcode="B5100" if sic=="3290"		
replace Catcode="M2100" if sic=="3310"		
replace Catcode="M2000" if sic=="3320"		
replace Catcode="M2200" if sic=="3330"		
replace Catcode="M2000" if sic=="3350"		
replace Catcode="M5000" if sic=="3390"		
replace Catcode="M5100" if sic=="3420"		
replace Catcode="M5000" if sic=="3430"		
replace Catcode="M5000" if sic=="3440"		
replace Catcode="M5000" if sic=="3460"		
replace Catcode="M5200" if sic=="3470"		
replace Catcode="M5300" if sic=="3480"		
replace Catcode="M5000" if sic=="3490"		
replace Catcode="M2300" if sic=="3510"		
replace Catcode="M2300" if sic=="3530"		
replace Catcode="M2300" if sic=="3540"		
replace Catcode="M2300" if sic=="3550"		
replace Catcode="M2300" if sic=="3560"		
replace Catcode="C5110" if sic=="3570"	
replace Catcode="C5110" if sic=="3576"		
replace Catcode="M2300" if sic=="3580"		
replace Catcode="M2300" if sic=="3590"	
replace Catcode="M2300" if sic=="3600"		
replace Catcode="M2300" if sic=="3620"		
replace Catcode="M4300" if sic=="3630"	
replace Catcode="M6000" if sic=="3640"		
replace Catcode="C5000" if sic=="3670"		
replace Catcode="M2300" if sic=="3690"	
replace Catcode="T6100" if sic=="3730"		
replace Catcode="T1700" if sic=="3760"		
replace Catcode="T0000" if sic=="3790"	
replace Catcode="M5000" if sic=="3910"		
replace Catcode="M4000" if sic=="3950"		
replace Catcode="M0000" if sic=="3990"	
replace Catcode="T4000" if sic=="4100"		
replace Catcode="T3100" if sic=="4210"		
replace Catcode="T6200" if sic=="4400"	
replace Catcode="E1100" if sic=="4610"		
replace Catcode="T0000" if sic=="4700"		
replace Catcode="E3000" if sic=="4950"		
replace Catcode="E3000" if sic=="4955"
replace Catcode="E1630" if sic=="4991"		
replace Catcode="T2000" if sic=="5010"		
replace Catcode="B5000" if sic=="5030"
replace Catcode="M7000" if sic=="5110"		
replace Catcode="M3000" if sic=="5130"
replace Catcode="G2500" if sic=="5140"		
replace Catcode="M1000" if sic=="5160"		
replace Catcode="G2850" if sic=="5180"
replace Catcode="G2400" if sic=="5400"		
replace Catcode="G4600" if sic=="5412"
replace Catcode="G4100" if sic=="5600"
replace Catcode="G4400" if sic=="5700"
replace Catcode="G2900" if sic=="5810"		
replace Catcode="G4600" if sic=="5900"
replace Catcode="G4600" if sic=="5940"		
replace Catcode="G4850" if sic=="5960" & CompanyID==190857198
replace Catcode="G4800" if sic=="5960" & (CompanyID==22478165153 | CompanyID==12574875566)
replace Catcode="G4600" if sic=="5990"
replace Catcode="F1100" if sic=="6020"		
replace Catcode="F1400" if sic=="6172"
replace Catcode="F1400" if sic=="6199"		
replace Catcode="F2000" if sic=="6200"
replace Catcode="F4000" if sic=="6500"
replace Catcode="F4500" if sic=="6510"		
replace Catcode="F4200" if sic=="6532"
replace Catcode="F2600" if sic=="6770"		
replace Catcode="F2200" if sic=="6795"
replace Catcode="G5210" if sic=="7310"
replace Catcode="F5200" if sic=="7320"		
replace Catcode="G5000" if sic=="7330"
replace Catcode="G5200" if sic=="7340" & CompanyID==6783367769		
replace Catcode="G5700" if sic=="7340" & CompanyID==3466910281		
replace Catcode="G5200" if sic=="7340" & CompanyID==205378456		
replace Catcode="G5200" if sic=="7340" & CompanyID==16003691		
replace Catcode="G5200" if sic=="7340" & CompanyID==285084273		
replace Catcode="G5200" if sic=="7340" & CompanyID==3599211212		
replace Catcode="G5200" if sic=="7340" & CompanyID==9966228730		
replace Catcode="G5200" if sic=="7340" & CompanyID==21521063434		
replace Catcode="G5300" if sic=="7350"
replace Catcode="C5130" if sic=="7370"		
replace Catcode="G5200" if sic=="7380"
replace Catcode="C4600" if sic=="7385"		
replace Catcode="T2000" if sic=="7500"
replace Catcode="T2500" if sic=="7510"		
replace Catcode="C2700" if sic=="7830"
replace Catcode="G6100" if sic=="7900"		
replace Catcode="G6100" if sic=="7990"
replace Catcode="H3000" if sic=="8000"		
replace Catcode="H2200" if sic=="8050"
replace Catcode="H2100" if sic=="8060"		
replace Catcode="H3000" if sic=="8090"
replace Catcode="H5000" if sic=="8200"		
replace Catcode="H6000" if sic=="8300"
replace Catcode="T1200" if sic=="3720"
replace Catcode="C5000" if sic=="3812"
replace Catcode="G4500" if sic=="5000" & CompanyID==195887652
replace Catcode="B5000" if sic=="5000" & CompanyID==203154812661
replace Catcode="G4500" if sic=="5000" & CompanyID==170004934
replace Catcode="C5100" if sic=="5040" & CompanyID==217999447
replace Catcode="M9000" if sic=="5040" & CompanyID==174705523
replace Catcode="M9000" if sic=="5040" & CompanyID==22271184822
replace Catcode="C5100" if sic=="5040" & CompanyID==218779510
replace Catcode="C5100" if sic=="5040" & CompanyID==191757277
replace Catcode="C5000" if sic=="5065" 
replace Catcode="B5000" if sic=="5070" & (CompanyID==195631912128 | CompanyID==50342410612 | CompanyID==213609117)
replace Catcode="B3400" if sic=="5070" & (CompanyID==116027072 | CompanyID==422547002 | CompanyID==2497011635 | CompanyID==19753344)
replace Catcode="M5100" if sic=="5070" & CompanyID==6700556873 
replace Catcode="D2000" if sic=="5080" & CompanyID==13873753
replace Catcode="M2300" if sic=="5080" & (CompanyID==1473811375 | CompanyID==48157695 | CompanyID==3792812476 | CompanyID==4551705900 | CompanyID==1577837854 | CompanyID==60585804 | CompanyID==221809731)
replace Catcode="M5100" if sic=="5080" & CompanyID==178080810807
replace Catcode="G5500" if sic=="5080" & CompanyID==1458110373
replace Catcode="T1300" if sic=="5080" & (CompanyID==4553485927 | CompanyID==22522745214 | CompanyID==281931)
replace Catcode="B3600" if sic=="5080" & CompanyID==25352048516
replace Catcode="M2300" if sic=="5090" & CompanyID==3849512823
replace Catcode="C4600" if sic=="5090" & CompanyID==423967141
replace Catcode="B5000" if sic=="5090" & CompanyID==3563610967
replace Catcode="H4100" if sic=="5090" & CompanyID==2652612604
replace Catcode="A1500" if sic=="5150" & (CompanyID==45125198 | CompanyID==7740566979)
replace Catcode="A1300" if sic=="5150" & CompanyID==407875319
replace Catcode="G2500" if sic=="5190" & (CompanyID==6399591263 | CompanyID==381810959)
replace Catcode="C1100" if sic=="5190" & CompanyID==151891426
replace Catcode="M7000" if sic=="5190" & (CompanyID==158523180 | CompanyID==25931189062)
replace Catcode="A1300" if sic=="5190" & (CompanyID==141346908 | CompanyID==3792712476)
replace Catcode="A4100" if sic=="5190" & CompanyID==59970010026
replace Catcode="G4600" if sic=="5200"
replace Catcode="E1170" if sic=="5500" & (CompanyID==314547673 | CompanyID==51826311734 | CompanyID==10068619652 | CompanyID==204793612774 | CompanyID==202011812583 | CompanyID==52138411958)
replace Catcode="T2300" if sic=="5500" & (CompanyID==1237763298  | CompanyID==529410653 | CompanyID==410395637 | CompanyID==3489510448 | CompanyID==3702711899 | CompanyID==6468333411 | CompanyID==59494656 | CompanyID==198407875 | CompanyID==2588512208 | CompanyID==272946210256 | CompanyID==26187359292)
replace Catcode="T8300" if sic=="5500" & (CompanyID==2667712697 | CompanyID==424457198)
replace Catcode="E1140" if sic=="5500" & CompanyID==8807607411
replace Catcode="G4500" if sic=="5500" & CompanyID==83934612562
replace Catcode="E1180" if sic=="5500" & CompanyID==15873565
replace Catcode="F1400" if sic=="5500" & CompanyID==7752837118
replace Catcode="M2300" if sic=="5500" & CompanyID==12657115679
replace Catcode="C5110" if sic=="6794" & (CompanyID==337559599 | CompanyID==867407072)      
replace Catcode="G2900" if sic=="6794" & (CompanyID==217889438 | CompanyID==25793834)
replace Catcode="T9100" if sic=="6794" & CompanyID==1084610991
replace Catcode="M3500" if sic=="6794" & CompanyID==297985981
replace Catcode="C2800" if sic=="6794" & CompanyID==1181931
replace Catcode="E1160" if sic=="6794" & CompanyID==13071486231
replace Catcode="H4100" if sic=="6794" & CompanyID==3827512690
replace Catcode="C5110" if sic=="6794" & CompanyID==280133434
replace Catcode="C5000" if sic=="6794" & CompanyID==2257810018
replace Catcode="M3200" if sic=="6794" & CompanyID==942010323
replace Catcode="M3100" if sic=="6794" & (CompanyID==205741012837 | CompanyID==106669845)
replace Catcode="C4100" if sic=="6794" & CompanyID==160573543
replace Catcode="M9000" if sic=="6794" & CompanyID==218519492
replace Catcode="G5200" if sic=="6794" & CompanyID==3733212098
replace Catcode="C2400" if sic=="6794" & CompanyID==7937308968
replace Catcode="G4600" if sic=="6794" & CompanyID==198927917
replace Catcode="G2500" if sic=="6794" & CompanyID==323928506
replace Catcode="C5110" if sic=="6794" & CompanyID==1480711794
replace Catcode="G2900" if sic=="6794" & CompanyID==297925967
replace Catcode="C2800" if sic=="6794" & CompanyID==2278910171
replace Catcode="M1000" if sic=="6794" & CompanyID==161423691
replace Catcode="" if sic=="6797" /* invalid sic code */
replace Catcode="G5000" if sic=="7200" & CompanyID==423627106
replace Catcode="G5240" if sic=="7200" & CompanyID==125933691
replace Catcode="G5100" if sic=="7200" & CompanyID==340499828
replace Catcode="F5300" if sic=="7200" & (CompanyID==802310026 | CompanyID==49985410331 | CompanyID==189792011704)
replace Catcode="M3100" if sic=="7200" & (CompanyID==1102712054 | CompanyID==183396459)
replace Catcode="H1700" if sic=="7200" & CompanyID==4736137990
replace Catcode="G5270" if sic=="7200" & CompanyID==422567002
replace Catcode="G5400" if sic=="7200" & (CompanyID==3778712390 | CompanyID==973312164 | CompanyID==3598711212 | CompanyID==1229263108)
replace Catcode="H4100" if sic=="7200" & CompanyID==3746412179
replace Catcode="G5500" if sic=="7200" & CompanyID==2631912476
replace Catcode="F1000" if sic=="7200" & CompanyID==402524572
replace Catcode="G5500" if sic=="7200" & CompanyID==45535595
replace Catcode="G5210" if sic=="8700" & (CompanyID==49568643 | CompanyID==12594215595 | CompanyID==283185711071)
replace Catcode="C5130" if sic=="8700" & CompanyID==5830011007
replace Catcode="G5280" if sic=="8700" & (CompanyID==178495941 | CompanyID==175621910612 | CompanyID==197647812)
replace Catcode="G5270" if sic=="8700" & (CompanyID==185386669 | CompanyID==203568307)
replace Catcode="G5250" if sic=="8700" & CompanyID==9727536166
replace Catcode="B1000" if sic=="8700" & CompanyID==16822097
replace Catcode="G5200" if sic=="8700" & (CompanyID==292715348 | CompanyID==174925210555 | CompanyID==308727118 | CompanyID==173105333)
replace Catcode="C5130" if sic=="8700" & CompanyID==21310622958
replace Catcode="E1150" if sic=="8700" & CompanyID==12594865595
replace Catcode="D9000" if sic=="8744" & (CompanyID==22260794805 | CompanyID==8688636008)
replace Catcode="G7000" if sic=="8744" & (CompanyID==1235712274  | CompanyID==1241212583)
save "temp_matched_2.dta", replace

* Append two datasets together
use "temp_matched_1.dta", clear
gen step_matched=1
append using "temp_matched_2.dta"
replace step_matched=2 if Catcode!="" & step_matched==.

/*

step_matche |
          d |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    726,777       50.23       50.23
          2 |    324,935       22.46       72.68
          . |    395,286       27.32      100.00
------------+-----------------------------------
      Total |  1,446,998      100.00

* Only 27% of observations have still missing Catcode. For these we use Sector classification in Boardex
*/
drop step_matched

* Match Boardex Sector to CRP Sector
replace Catcode="D0000" if Catcode=="" & Sector=="Aerospace & Defence"
replace Catcode="T2000" if Catcode=="" & Sector=="Automobiles & Parts"
replace Catcode="F1000" if Catcode=="" & Sector=="Banks"
replace Catcode="G2850" if Catcode=="" & Sector=="Beverages" & (CompanyID==203269012668 | CompanyID==11166623108 | CompanyID==50232910522)
replace Catcode="G2810" if Catcode=="" & Sector=="Beverages" & (CompanyID==11299273389 | CompanyID==12655495679 | CompanyID==25745338892)
replace Catcode="G2800" if Catcode=="" & Sector=="Beverages" & (CompanyID==50102310423 | CompanyID==13062656218 | CompanyID==12755945818 | CompanyID==12698315735)
replace Catcode="G2600" if Catcode=="" & Sector=="Beverages" & CompanyID==12594165595
replace Catcode="T1000" if Catcode=="" & Sector=="Beverages" & CompanyID==26115809228
replace Catcode="F2600" if Catcode=="" & Sector=="Blank Check / Shell Companies"
replace Catcode="G5200" if Catcode=="" & Sector=="Business Services"
replace Catcode="M1000" if Catcode=="" & Sector=="Chemicals"
replace Catcode="M3000" if Catcode=="" & Sector=="Clothing, Leisure and Personal Products"
replace Catcode="G5000" if Catcode=="" & Sector=="Consumer Services"
replace Catcode="M7000" if Catcode=="" & Sector=="Containers & Packaging"
replace Catcode="H5000" if Catcode=="" & Sector=="Education"
replace Catcode="E1600" if Catcode=="" & Sector=="Electricity"
replace Catcode="M2300" if Catcode=="" & Sector=="Electronic & Electrical Equipment"
replace Catcode="B4000" if Catcode=="" & Sector=="Engineering & Machinery"
replace Catcode="G2000" if Catcode=="" & Sector=="Food Producers & Processors"
replace Catcode="A5000" if Catcode=="" & Sector=="Forestry & Paper"
replace Catcode="G4000" if Catcode=="" & Sector=="General Retailers"
replace Catcode="H0000" if Catcode=="" & Sector=="Health"
replace Catcode="M4000" if Catcode=="" & Sector=="Household Products"
replace Catcode="C5110" if Catcode=="" & Sector=="Information Technology Hardware"
replace Catcode="F3000" if Catcode=="" & Sector=="Insurance"
replace Catcode="F2100" if Catcode=="" & Sector=="Investment Companies"
replace Catcode="K0000" if Catcode=="" & Sector=="Legal"
replace Catcode="T9000" if Catcode=="" & Sector=="Leisure & Hotels"
replace Catcode="F3300" if Catcode=="" & Sector=="Life Assurance"
replace Catcode="C2000" if Catcode=="" & Sector=="Media & Entertainment"
replace Catcode="E1200" if Catcode=="" & Sector=="Mining"
replace Catcode="E1100" if Catcode=="" & Sector=="Oil & Gas"
replace Catcode="H4300" if Catcode=="" & Sector=="Pharmaceuticals and Biotechnology"
replace Catcode="F2600" if Catcode=="" & Sector=="Private Equity"
replace Catcode="C1000" if Catcode=="" & Sector=="Publishing"
replace Catcode="F4000" if Catcode=="" & Sector=="Real Estate"
replace Catcode="E1500" if Catcode=="" & Sector=="Renewable Energy"
replace Catcode="C5120" if Catcode=="" & Sector=="Software & Computer Services"
replace Catcode="F0000" if Catcode=="" & Sector=="Sovereign Wealth Fund"
replace Catcode="F0000" if Catcode=="" & Sector=="Speciality & Other Finance"
replace Catcode="M2100" if Catcode=="" & Sector=="Steel & Other Metals"
replace Catcode="C4000" if Catcode=="" & Sector=="Telecommunication Services"
replace Catcode="A1300" if Catcode=="" & Sector=="Tobacco"
replace Catcode="T0000" if Catcode=="" & Sector=="Transport"
replace Catcode="G3000" if Catcode=="" & Sector=="Wholesale Trade"
replace Catcode="M3100" if Catcode=="" & Sector=="Clothing & Personal Products"
replace Catcode="C6100" if Catcode=="" & Sector=="Leisure Goods" & (CompanyID==273332310281 | CompanyID==25578918739 | CompanyID==21457983298 | CompanyID==176710610694) 
replace Catcode="M2300" if Catcode=="" & Sector=="Leisure Goods" & CompanyID==20920571708
replace Catcode="M3600" if Catcode=="" & Sector=="Leisure Goods" & (CompanyID==8691316047 | CompanyID==52175111988)
replace Catcode="G4200" if Catcode=="" & Sector=="Leisure Goods" & (CompanyID==12794685873 | CompanyID==21267532854)
replace Catcode="M3500" if Catcode=="" & Sector=="Leisure Goods" & CompanyID==204568712760
replace Catcode="M9200" if Catcode=="" & Sector=="Leisure Goods" & CompanyID==187299111514
replace Catcode="G4100" if Catcode=="" & Sector=="Leisure Goods" & CompanyID==29722210775
replace Catcode="M4100" if Catcode=="" & Sector=="Leisure Goods" & CompanyID==26099719219
replace Catcode="C5000" if Catcode=="" & Sector=="Leisure Goods" & CompanyID==3746712186


* For the following categories (which are too general and span multiple CRP industries), look online and in the CRP dataset if the company appears there (to look at the industry in which the company appears in the lobbying data)
replace Catcode="A0000" if Catcode=="" & Sector=="Diversified Industrials" & (CompanyID==7017411559 | CompanyID==11252973298)
replace Catcode="M0000" if Catcode=="" & Sector=="Diversified Industrials" & (CompanyID==2777969182 | CompanyID==186896813)
replace Catcode="M3000" if Catcode=="" & Sector=="Diversified Industrials" & CompanyID==2866329932 
replace Catcode="H5000" if Catcode=="" & Sector=="Diversified Industrials" & CompanyID==13015086153 
replace Catcode="E0000" if Catcode=="" & Sector=="Diversified Industrials" & (CompanyID==5363541141 | CompanyID==6869298614)
replace Catcode="M2300" if Catcode=="" & Sector=="Diversified Industrials" & CompanyID==2431811212 
replace Catcode="A4000" if Catcode=="" & Sector=="Diversified Industrials" & CompanyID==7740566979 
replace Catcode="E1170" if Catcode=="" & Sector=="Diversified Industrials" & CompanyID==10068619652 
replace Catcode="C2000" if Catcode=="" & Sector=="Diversified Industrials" & CompanyID==2678112753 
replace Catcode="G6550" if Catcode=="" & Sector=="Diversified Industrials" & CompanyID==179071010880  
replace Catcode="M3100" if Catcode=="" & Sector=="Diversified Industrials" & CompanyID==22388765013 
replace Catcode="F4100" if Catcode=="" & Sector=="Diversified Industrials" & (CompanyID==24941538114 | CompanyID==24070927186)
replace Catcode="G5200" if Catcode=="" & Sector=="Diversified Industrials" & CompanyID==24996198165 

replace Catcode="B1000" if Catcode=="" & Sector=="Construction & Building Materials" & (CompanyID==15355668691 | CompanyID==14161187488 | CompanyID==173964310473 | CompanyID==12665155693 | CompanyID==51696511643 | CompanyID==15393778730 | CompanyID==8007010767 ///
	| CompanyID==175252510580 | CompanyID==15374018711 | CompanyID==15369598711 | CompanyID==15354508691 | CompanyID==15393508730 | CompanyID==14285807630 | CompanyID==15371758711 | CompanyID==22797405609)
replace Catcode="B6000" if Catcode=="" & Sector=="Construction & Building Materials" & (CompanyID==15438388768 | CompanyID==24599767759 | CompanyID==16694099889)
replace Catcode="B0000" if Catcode=="" & Sector=="Construction & Building Materials" & (CompanyID==7909528711 | CompanyID==25118798287 | CompanyID==186754911475 | CompanyID==63482112661 | CompanyID==23654406705)
replace Catcode="B4000" if Catcode=="" & Sector=="Construction & Building Materials" & (CompanyID==15360258701 | CompanyID==951363671 | CompanyID==15377908711 | CompanyID==15367428701 | CompanyID==15436678768 | CompanyID==9830247367 | CompanyID==15392238730 ///
	| CompanyID==90972210120 | CompanyID==15405028739 | CompanyID==12929366047 | CompanyID==168666110035 | CompanyID==15360488701 | CompanyID==13521386778 | CompanyID==13521886778 | CompanyID==15376988711 | CompanyID==15365338701 | CompanyID==15391978730 ///
	| CompanyID==15360788701 | CompanyID==15432808768 | CompanyID==15360558701 | CompanyID==50192010489 | CompanyID==196603412201 | CompanyID==1399275873 | CompanyID==15390088730 | CompanyID==10659281477 | CompanyID==15397148730 | CompanyID==24929778104 | CompanyID==23608026657 | CompanyID==194420812039 | CompanyID==16282729536 | CompanyID==11177353132 | CompanyID==201968912576 | CompanyID==24193547322 | CompanyID==280758210880 | CompanyID==9007479347)     
replace Catcode="B3400" if Catcode=="" & Sector=="Construction & Building Materials" & (CompanyID==15374158711 | CompanyID==15394848730)
replace Catcode="B5000" if Catcode=="" & Sector=="Construction & Building Materials" & (CompanyID==10716071751 | CompanyID==195602912128 | CompanyID==195602912128 | CompanyID==12726965777 | CompanyID==15434428768 | CompanyID==50077910398 | CompanyID==11995194690 ///
	| CompanyID==190997209 | CompanyID==169960010146 | CompanyID==11233113251 | CompanyID==10641781374 | CompanyID==5381471951 | CompanyID==11971734656 | CompanyID==170004934 | CompanyID==24086107209 | CompanyID==4561536021 | CompanyID==271354710120)
replace Catcode="B3000" if Catcode=="" & Sector=="Construction & Building Materials" & (CompanyID==15372398711 | CompanyID==15362528701 | CompanyID==51779111704)
replace Catcode="E1500" if Catcode=="" & Sector=="Construction & Building Materials" & CompanyID==16822097
replace Catcode="D0000" if Catcode=="" & Sector=="Construction & Building Materials" & CompanyID==2843829749
replace Catcode="M7200" if Catcode=="" & Sector=="Construction & Building Materials" & CompanyID==13035486179
replace Catcode="M1500" if Catcode=="" & Sector=="Construction & Building Materials" & CompanyID==26055119173
replace Catcode="E1240" if Catcode=="" & Sector=="Construction & Building Materials" & CompanyID==26547739617

replace Catcode="G2840" if Catcode=="" & Sector=="Food & Drug Retailers" & CompanyID==186749711475
replace Catcode="G2400" if Catcode=="" & Sector=="Food & Drug Retailers" & (CompanyID== 52484312208 | CompanyID==15408438739 | CompanyID==11988144690 | CompanyID==11971324656 | CompanyID==12616635623 | CompanyID==12576055566 | CompanyID==314547673 | CompanyID==22498635183)
replace Catcode="G2500" if Catcode=="" & Sector=="Food & Drug Retailers" & (CompanyID==22164074656 | CompanyID==6399591263 | CompanyID==179279410904)
replace Catcode="G4900" if Catcode=="" & Sector=="Food & Drug Retailers" & CompanyID==24141006
replace Catcode="G2100" if Catcode=="" & Sector=="Food & Drug Retailers" & CompanyID==21358923059
replace Catcode="G5270" if Catcode=="" & Sector=="Food & Drug Retailers" & CompanyID==281954710975

replace Catcode="E1610" if Catcode=="" & Sector=="Utilities - Other" & CompanyID==12821585900
replace Catcode="E1600" if Catcode=="" & Sector=="Utilities - Other" & (CompanyID==24850198021 | CompanyID==12460335407 | CompanyID==15370888711 | CompanyID==22256584805 | CompanyID==24955098124 | CompanyID==2689068367 | CompanyID==15410838749)
replace Catcode="E1700" if Catcode=="" & Sector=="Utilities - Other" & CompanyID==14492127844
replace Catcode="E3000" if Catcode=="" & Sector=="Utilities - Other" & (CompanyID==7831937959 | CompanyID==15401988739)
replace Catcode="E1100" if Catcode=="" & Sector=="Utilities - Other" & CompanyID==70286310026
replace Catcode="E2000" if Catcode=="" & Sector=="Utilities - Other" & (CompanyID==52539412245 | CompanyID==168271510009 | CompanyID==3650511559 | CompanyID==178470610839 | CompanyID==13366366596)
replace Catcode="E5000" if Catcode=="" & Sector=="Utilities - Other" & (CompanyID==22917575777 | CompanyID==5695007175 | CompanyID==11248823298 | CompanyID==24888478063 | CompanyID==173124810406)
replace Catcode="C5000" if Catcode=="" & Sector=="Utilities - Other" & CompanyID==82484611521

/*
tab Sector if Catcode =="" 

                           Sector |      Freq.     Percent        Cum.
----------------------------------+-----------------------------------
                       Government |        694       14.44       14.44
                       Regulators |        556       11.57       26.01
                Trade Association |      3,556       73.99      100.00
----------------------------------+-----------------------------------
                            Total |      4,806      100.00



Only few missing sectors are government, regulators, and trade associations, which we can later drop								  
*/

drop company_name_compustat Company_Name_SECfilings SICname

* Merge to CRP full sector classification
preserve
use "${dir}/Data/Lobbying CRP/CRP_Categories.dta", clear
keep catcode catname catorder industry sector
rename catcode Catcode
save "temp.dta", replace
restore
merge m:1 Catcode using "temp.dta"
drop if _m==2
drop _m

erase "temp_matched_1.dta"
erase "temp_matched_2.dta"
save "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned_SIC_CRPsector.dta", replace	




		*********************************************************************************************
		*********************************************************************************************
		*** 	STEP 13:  Combine lobbying, committee assignment, and committee-issue matching    ***
		***				  to create a dataset at the industry-cycle-MC level					  ***
		*********************************************************************************************
		*********************************************************************************************

		
*************************************************************	
***** 2 DATASETS WITHOUT USING SUBCOMMITTEE ASSIGNMENTS	*****	
*************************************************************		
		
		
		* Note: the dataset is actually at the MC-chamber-cycle level (not MC-cycle) since a handful of MC serve
		* in both house and senate in same cycle

* Reshape committee assignment data
use "${dir}/Data/Federal Committee Assignmnents/committee_assignments_final.dta", clear
reshape long committee_ chairman_ ranking_minority_, i(icpsr2 cycle chamber ) j(committee_code)
keep if committee_!=.
drop committee_code
rename committee_ committee
rename chairman_ chairman
rename ranking_minority_ ranking_minority
drop majority majority_leader majority_whip minority_leader minority_whip speaker_house
* change code of a house select committee with a typo (it should be 254, not 299)
replace committee=254 if committee==299
save "temp_1.dta", replace

* Reshape committee-issue matching
import excel using "${dir}/Data/Federal Committee Assignmnents/Committee_ISSUE_CRP_Mapping.xlsx", sheet("Committees") first clear	
forvalues i=1(1)28 {
replace issue_`i'=ltrim(issue_`i')
replace issue_`i'=rtrim(issue_`i')
replace issue_`i'=itrim(issue_`i')
}
duplicates drop Committee_code, force /* some committees change name over time, and appear multiple times */
reshape long issue_, i(Committee_code) j(temp)
drop temp
rename issue_ issueID
drop if issueID=="" 
rename Committee_code committee
save "temp_2.dta", replace

*** For each definition of "top lobbying issues" of the industry, create the final dataset at the industry-cycle-MC level ***

* Combine MCs to issues 
use "temp_1.dta", clear
joinby committee using "temp_2.dta", unmatched(both)
* one committee to be dropped: joint select committee on deficit reduction (existed for only 4 months)
keep if _m==3
drop _m
* Now, a few MCs seat on multiple committees which have issues in common, we can collapse these
* If they are chairman or ranking members of one of these committees, then we keep this information
collapse (max) chairman ranking_minority, by(icpsr2 cycle chamber issueID)

* Combine issues to industries, based on the 2 different definitions of most lobbied industries

		* Definition 1
preserve
joinby issueID cycle using "${dir}/Data/Lobbying CRP/top_issues_definition_1.dta", unmatched(both)
 tab _m
 bys icpsr2 cycle : egen max=max(_m)
 tab max

/*


                       _merge |      Freq.     Percent        Cum.
------------------------------+-----------------------------------
          only in master data |     40,304       31.87       31.87
both in master and using data |     86,146       68.13      100.00
------------------------------+-----------------------------------
                        Total |    126,450      100.00


Many issues never appear as the top lobbied ones by an industry in a cycle


        max |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |        120        0.09        0.09
          3 |    126,330       99.91      100.00
------------+-----------------------------------
      Total |    126,450      100.00

						
However, virtually all MCs in each cycle are in a committee dealing with an issue that is of interest to at least one industry
						
*/
drop if _m==1
drop _m

* Now, a few MCs deal with issues of interest to the same industry, we can collapse these
* If they are chairman or ranking members of one of these committees, then we keep this information
collapse (max) chairman ranking_minority (first) catorder, by(icpsr2 cycle chamber industry)
 
save "${dir}/Data/Final Datasets/MC_cycle_industry_match_Definition_1.dta", replace
restore


		* Definition 2
preserve
joinby issueID using "${dir}/Data/Lobbying CRP/top_issues_definition_2.dta", unmatched(both)
 tab _m
 bys icpsr2 cycle : egen max=max(_m)
 tab max
/*

                       _merge |      Freq.     Percent        Cum.
------------------------------+-----------------------------------
          only in master data |     20,434        7.13        7.13
both in master and using data |    266,330       92.87      100.00
------------------------------+-----------------------------------
                        Total |    286,764      100.00


Allowing 3 issues for each industry we get that most issues appear matched to at least one industry

        max |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |          5        0.00        0.00
          3 |    286,759      100.00      100.00
------------+-----------------------------------
      Total |    286,764      100.00

						
Virtually all MCs in each cycle are in a committee dealing with an issue that is of interest to at least one industry
						
*/
drop if _m==1
drop _m

* Now, a few MCs deal with issues of interest to the same industry, we can collapse these
* If they are chairman or ranking members of one of these committees, then we keep this information
collapse (max) chairman ranking_minority (first) catorder, by(icpsr2 cycle chamber industry)
 
save "${dir}/Data/Final Datasets/MC_cycle_industry_match_Definition_2.dta", replace
restore

erase "temp_1.dta"
erase "temp_2.dta"



*****************************************************
***** 2 DATASETS USING SUBCOMMITTEE ASSIGNMENTS	*****	
*****************************************************	


		
		* Note: the dataset is actually at the MC-chamber-cycle level (not MC-cycle) since a handful of MC serve
		* in both house and senate in same cycle

* Reshape committee assignment data
use "${dir}/Data/Federal Committee Assignmnents/committee_assignments_final.dta", clear
reshape long committee_ chairman_ ranking_minority_, i(icpsr2 cycle chamber ) j(committee_code)
keep if committee_!=.
drop committee_code
rename committee_ committee
rename chairman_ chairman
rename ranking_minority_ ranking_minority
drop majority majority_leader majority_whip minority_leader minority_whip speaker_house
* change code of a house select committee with a typo (it should be 254, not 299)
replace committee=254 if committee==299

* DROP APPROPRIATIONS AND COMMERCE COMMITTEES: FOR THESE COMMITTEES WE USE THE SUBCOMMITTEES
drop if committee==104 | committee==128 | committee==306 | committee==321

drop name
tostring committee, replace

save "temp_1.dta", replace

* Reshape SUBcommittee assignment data
use "${dir}/Data/Federal Committee Assignmnents/SUBcommittee_assignments_final.dta", clear
reshape long Committee_Code_ chairman_ ranking_minority_, i(icpsr2 cycle chamber ) j(committee_code)
keep if Committee_Code_!=""
drop committee_code
rename Committee_Code_ committee
rename chairman_ chairman
rename ranking_minority_ ranking_minority
replace committee=committee

save "temp_1_SUB.dta", replace

* Reshape committee-issue matching
import excel using "${dir}/Data/Federal Committee Assignmnents/Committee_ISSUE_CRP_Mapping.xlsx", sheet("Committees") first clear	
forvalues i=1(1)28 {
replace issue_`i'=ltrim(issue_`i')
replace issue_`i'=rtrim(issue_`i')
replace issue_`i'=itrim(issue_`i')
}
duplicates drop Committee_code, force /* some committees change name over time, and appear multiple times */
reshape long issue_, i(Committee_code) j(temp)
drop temp
rename issue_ issueID
drop if issueID=="" 
rename Committee_code committee

* DROP APPROPRIATIONS AND COMMERCE COMMITTEES: FOR THESE COMMITTEES WE USE THE SUBCOMMITTEES
drop if committee==104 | committee==128 | committee==306 | committee==321

tostring committee, replace

save "temp_2.dta", replace

* Reshape SUBcommittee-issue matching
forvalues x=106(1)115 {
import excel using "${dir}/Data/Federal Committee Assignmnents/Committee_ISSUE_CRP_Mapping.xlsx", sheet("House `x'") first clear	
drop if Committee_Code==""
forvalues i=1(1)43 {
cap tostring issue_`i', replace
replace issue_`i'=ltrim(issue_`i')
replace issue_`i'=rtrim(issue_`i')
replace issue_`i'=itrim(issue_`i')
replace issue_`i'="" if issue_`i'=="."
}
reshape long issue_, i(Committee_Code) j(temp)
drop temp
rename issue_ issueID
drop if issueID=="" 
rename Committee_Code committee
replace committee=committee+"_"+"`x'"
rename Committee_Subcommittee Chamber_Committee
replace Chamber_Committee="House,"+Chamber_Committee

save "temp_2_SUB_H`x'.dta", replace
}
forvalues x=106(1)115 {
import excel using "${dir}/Data/Federal Committee Assignmnents/Committee_ISSUE_CRP_Mapping.xlsx", sheet("Senate `x'") first clear	
drop if Committee_Code==""
forvalues i=1(1)43 {
cap tostring issue_`i', replace
replace issue_`i'=ltrim(issue_`i')
replace issue_`i'=rtrim(issue_`i')
replace issue_`i'=itrim(issue_`i')
replace issue_`i'="" if issue_`i'=="."
}
reshape long issue_, i(Committee_Code) j(temp)
drop temp
rename issue_ issueID
drop if issueID=="" 
rename Committee_Code committee
replace committee=committee+"_"+"`x'"
rename Committee_Subcommittee Chamber_Committee
replace Chamber_Committee="Senate,"+Chamber_Committee

save "temp_2_SUB_S`x'.dta", replace

}
use "temp_2_SUB_H106.dta"
append using "temp_2_SUB_S106.dta"
forvalues x=107(1)115 {
append using "temp_2_SUB_H`x'.dta"
append using "temp_2_SUB_S`x'.dta"
}
save "temp_2_SUB.dta", replace

forvalues x=106(1)115 {
erase "temp_2_SUB_H`x'.dta"
erase "temp_2_SUB_S`x'.dta"
}

*** For each definition of "top lobbying issues" of the industry, create the final dataset at the industry-cycle-MC level ***

* Combine MCs to issues in committees
use "temp_1.dta", clear
joinby committee using "temp_2.dta", unmatched(both)
* one committee to be dropped: joint select committee on deficit reduction (existed for only 4 months)
keep if _m==3
drop _m
save "temp.dta", replace
* Combine MCs to issues in SUBcommittees
use "temp_1_SUB.dta", clear
joinby committee using "temp_2_SUB.dta", unmatched(both)
keep if _m==3
drop _m
append using "temp.dta"
erase "temp.dta"
erase "temp_1.dta"
erase "temp_2.dta"
erase "temp_1_SUB.dta"
erase "temp_2_SUB.dta"

* Now, a few MCs seat on multiple committees which have issues in common, we can collapse these
* If they are chairman or ranking members of one of these committees, then we keep this information
collapse (max) chairman ranking_minority, by(icpsr2 cycle chamber issueID)

* Combine issues to industries, based on the 2 different definitions of most lobbied industries


		* Definition 1
preserve
joinby issueID cycle using "${dir}/Data/Lobbying CRP/top_issues_definition_1.dta", unmatched(both)
 tab _m
 bys icpsr2 cycle : egen max=max(_m)
 tab max

/*


                       _merge |      Freq.     Percent        Cum.
------------------------------+-----------------------------------
          only in master data |     47,974       32.46       32.46
both in master and using data |     99,803       67.54      100.00
------------------------------+-----------------------------------
                        Total |    147,777      100.00



Many issues never appear as the top lobbied ones by an industry in a cycle


        max |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |        127        0.09        0.09
          3 |    147,650       99.91      100.00
------------+-----------------------------------
      Total |    147,777      100.00


						
However, virtually all MCs in each cycle are in a committee dealing with an issue that is of interest to at least one industry
						
*/
drop if _m==1
drop _m

* Now, a few MCs deal with issues of interest to the same industry, we can collapse these
* If they are chairman or ranking members of one of these committees, then we keep this information
collapse (max) chairman ranking_minority (first) catorder, by(icpsr2 cycle chamber industry)
 
save "${dir}/Data/Final Datasets/MC_cycle_industry_match_Definition_1_SUB.dta", replace
restore

	

		* Definition 2
preserve
joinby issueID using "${dir}/Data/Lobbying CRP/top_issues_definition_2.dta", unmatched(both)
 tab _m
 bys icpsr2 cycle : egen max=max(_m)
 tab max
/*


                       _merge |      Freq.     Percent        Cum.
------------------------------+-----------------------------------
          only in master data |     24,281        7.59        7.59
both in master and using data |    295,643       92.41      100.00
------------------------------+-----------------------------------
                        Total |    319,924      100.00




Allowing 3 issues for each industry we get that most issues appear matched to at least one industry


        max |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |          8        0.00        0.00
          3 |    319,916      100.00      100.00
------------+-----------------------------------
      Total |    319,924      100.00

				
Virtually all MCs in each cycle are in a committee dealing with an issue that is of interest to at least one industry
						
*/
drop if _m==1
drop _m

* Now, a few MCs deal with issues of interest to the same industry, we can collapse these
* If they are chairman or ranking members of one of these committees, then we keep this information
collapse (max) chairman ranking_minority (first) catorder, by(icpsr2 cycle chamber industry)
 
save "${dir}/Data/Final Datasets/MC_cycle_industry_match_Definition_2_SUB.dta", replace
restore




************************************************************
***** Definition of Ovtchinnikov and Pantaleoni (2012) *****
************************************************************

* Reshape committee assignment data
use "${dir}/Data/Federal Committee Assignmnents/committee_assignments_final.dta", clear
reshape long committee_ chairman_ ranking_minority_, i(icpsr2 cycle chamber ) j(committee_code)
keep if committee_!=.
drop committee_code
rename committee_ committee
rename chairman_ chairman
rename ranking_minority_ ranking_minority
drop majority majority_leader majority_whip minority_leader minority_whip speaker_house
* change code of a house select committee with a typo (it should be 254, not 299)
replace committee=254 if committee==299
save "temp_1.dta", replace

* comine MCs to industries in Ovtchinnikov and Pantaleoni (2012)
import excel using "${dir}/Data/Federal Committee Assignmnents/Committee_Assignment_ovtchinnikov_and_pantaleoni_2012.xlsx", first clear	
keep catorder Housecode* Senatecode*
preserve
keep catorder Housecode*
reshape long Housecode, i(catorder) j(n)
drop if Housecode==.
rename Housecode committee
gen chamber="H"
save "temp_OP2012_H.dta", replace
restore
preserve
keep catorder Senatecode*
reshape long Senatecode, i(catorder) j(n)
drop if Senatecode==.
rename Senatecode committee
gen chamber="S"
save "temp_OP2012_S.dta", replace
restore
use "temp_OP2012_H.dta", clear
append using "temp_OP2012_S.dta"
drop n
save "temp_OP2012.dta"
erase "temp_OP2012_H.dta"
erase "temp_OP2012_S.dta"

use "temp_1.dta", clear
joinby committee chamber using "temp_OP2012.dta", unmatched(both)
keep if _m==3
drop _m
* Now, a few MCs seat on multiple committees which have industries in common, we can collapse these
* If they are chairman or ranking members of one of these committees, then we keep this information
collapse (max) chairman ranking_minority, by(icpsr2 cycle chamber catorder)
save "${dir}/Data/Final Datasets/MC_cycle_industry_match_Definition_ovtchinnikov_and_pantaleoni_2012.dta", replace

erase "temp_1.dta"
erase "temp_OP2012.dta"

		***********************************************************************************************
		***********************************************************************************************
		*** 				STEP 14:  Merge information on whether individual is    				***
		***						executive director or other top management									***
		***********************************************************************************************
		***********************************************************************************************


*** Create dataset with only executive directors and other top management and merge it to the panel ***
forvalues i=1(1)5 {
preserve
use "${dir}/Data/Boardex/SMDEs Board Summary/SMDEs_Summary_`i'.dta", clear
keep AnnualReportYear Country CompanyID DirectorID IndividualRole
keep if Country=="United States"
drop Country
save "temp_`i'.dta", replace
restore
}
use "temp_1.dta", clear
append using "temp_2.dta"
append using "temp_3.dta"
append using "temp_4.dta"
append using "temp_5.dta"
duplicates drop AnnualReportYear CompanyID DirectorID IndividualRole, force

forvalues i=1(1)5 {
erase "temp_`i'.dta"
}

* Top management
gen top_mgmt=regexs(0) if regexm(IndividualRole,"(Chief)") 
replace top_mgmt=regexs(0) if regexm(IndividualRole,"(Executive)") 
replace top_mgmt=regexs(0) if regexm(IndividualRole,"(CFO)") 
replace top_mgmt=regexs(0) if regexm(IndividualRole,"(CEO)") 
replace top_mgmt=regexs(0) if regexm(IndividualRole,"(COO)") 
replace top_mgmt=regexs(0) if regexm(IndividualRole,"(CIO)") 
replace top_mgmt=regexs(0) if regexm(IndividualRole,"(CMO)") 
replace top_mgmt=regexs(0) if regexm(IndividualRole,"(Executive)") 
keep if top_mgmt!=""
drop top_mgmt

* Executive directors
forvalues i=1(1)3 {
preserve
use "${dir}/Data/Boardex/Board Summary/Board_Summary_`i'.dta", clear
keep AnnualReportYear Country CompanyID DirectorID DirectorTypeEDorSD
keep if Country=="United States"
drop Country
duplicates drop AnnualReportYear CompanyID DirectorID DirectorTypeEDorSD, force
keep if DirectorTypeEDorSD =="ED"
drop DirectorTypeEDorSD
duplicates drop AnnualReportYear CompanyID DirectorID, force
save "temp_`i'.dta", replace
restore
}
append using "temp_1.dta"
append using "temp_2.dta"
append using "temp_3.dta"

forvalues i=1(1)3 {
erase "temp_`i'.dta"
}


* Create year and month of Annual Report
gen year=substr(AnnualReportYear,-2,.)
replace year="20" if year=="nt"
replace year="20"+year if year!="99"
replace year="1999" if year=="99"
gen month=substr(AnnualReportYear,1,3)
replace month="01" if month=="Jan"
replace month="02" if month=="Feb"
replace month="03" if month=="Mar"
replace month="04" if month=="Apr"
replace month="05" if month=="May"
replace month="06" if month=="Jun"
replace month="07" if month=="Jul"
replace month="08" if month=="Aug"
replace month="09" if month=="Sep"
replace month="10" if month=="Oct"
replace month="11" if month=="Nov"
replace month="12" if month=="Dec"
replace month="13" if month=="Cur"
gen year_month=year+month
destring month, replace
destring year, replace 
destring year_month, replace
* Keep only years before 2018 since contributions data end in the 2018 election cycle
keep if year<=2018
* Create variable that gives the lag between a report year and the last one
preserve
duplicates drop AnnualReportYear CompanyID, force
sort CompanyID year month
by CompanyID: gen lag_report=year-year[_n-1]	
sort CompanyID year month
by CompanyID: gen lag_report_2=(lag_report[_n+1]!=. & lag_report[_n+1]>=2)
keep AnnualReportYear CompanyID	 lag_report lag_report_2 year_month
save "temp.dta", replace
restore	
merge m:1 AnnualReportYear CompanyID using "temp.dta"
drop _m
erase "temp.dta"
tab lag_report
* Save dataset with observations with lag=0 or 1
preserve
keep if lag_report==. | lag_report==0 | lag_report==1
save "Data_careers_1.dta", replace
restore
* For observations with lag>1, consider them in the firm in the years in between two years if they appear in the firm in both the years
preserve
keep if lag_report!=. & lag_report!=0 & lag_report!=1
save "Data_careers_2.dta", replace
restore
keep if (lag_report!=. & lag_report>=2) | (lag_report_2==1)
sort CompanyID DirectorID year month
by CompanyID: gen onboard=(DirectorID==DirectorID[_n-1] & year==year[_n-1]+lag_report)
keep if onboard==1
keep if lag_report>=2
drop AnnualReportYear onboard month year_month  lag_report_2
expand lag_report, gen(new)
keep if new==1
bys CompanyID DirectorID year: gen n=_n
gen new_year=year-n
drop year lag_report new n
rename new_year year

* Append datasets together
append using "Data_careers_2.dta"
append using "Data_careers_1.dta"

keep CompanyID DirectorID year

erase "Data_careers_1.dta"
erase "Data_careers_2.dta"
	
* Eliminates cases of firms having multiple annual reports in same year
duplicates drop CompanyID DirectorID year, force

gen cycle=.
replace cycle=2000 if year==1999 | year==2000
replace cycle=2002 if year==2001 | year==2002
replace cycle=2004 if year==2003 | year==2004
replace cycle=2006 if year==2005 | year==2006
replace cycle=2008 if year==2007 | year==2008
replace cycle=2010 if year==2009 | year==2010
replace cycle=2012 if year==2011 | year==2012
replace cycle=2014 if year==2013 | year==2014
replace cycle=2016 if year==2015 | year==2016
replace cycle=2018 if year==2017 | year==2018

duplicates drop CompanyID DirectorID cycle, force
keep CompanyID DirectorID cycle

save "${dir}/Data/Boardex/temp_exec.dta", replace		
		
		
use "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned_SIC_CRPsector.dta", clear
merge m:1 CompanyID DirectorID cycle using "${dir}/Data/Boardex/temp_exec.dta"
drop if _m==2
gen top_mgmt=(_m==3)
drop _m	
save "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned_SIC_CRPsector.dta", replace


		***********************************************************************************************
		***********************************************************************************************
		*** 				STEP 15:  Merge information on whether company has    				    ***
		***									donated through a PAC									***
		***********************************************************************************************
		***********************************************************************************************

		
*** Standardize company names for DIME ***

insheet using "${dir}/Data/DIME_V3/dime_contributors_1979_2018.csv", clear
keep if contributortype=="C" /* keep only PACs */
keep bonicacid mostrecentcontributorname mostrecentcontributorcity mostrecentcontributorstate mostrecentcontributorzipcode first_cycle_active last_cycle_active
stnd_compname mostrecentcontributorname, gen(Cmostrecentcontributorname_stnd) p(${dir}/Data/Pattern_Files)
gen id_using=_n
rename Cmostrecentcontributorname_stnd CompanyName_stnd
save "${dir}/temp_PACs.dta", replace
		
	
*** Standardize company names for Boardex ***

use "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned_SIC_CRPsector.dta", clear

keep CompanyID CompanyName
duplicates drop CompanyID, force

foreach x in CompanyName {
replace `x'=lower(`x')
replace `x'=rtrim(`x')
replace `x'=ltrim(`x')
replace `x'=itrim(`x')
}

gen CompanyName_Orig=CompanyName

* Alternative company names
gen str CompanyName2=regexs(0) if regexm(CompanyName,"(\().+( prior to)")
replace CompanyName2=subinstr(CompanyName2,"prior to","",.)
replace CompanyName2=subinstr(CompanyName2,"(","",.)
replace CompanyName2=subinstr(CompanyName2,")","",.)

gen str CompanyName3=regexs(0) if regexm(CompanyName,"(formerly known as).+(\) \()")
replace CompanyName3=regexs(0) if regexm(CompanyName,"(formerly known as).+(\))")
replace CompanyName3=subinstr(CompanyName3,"(","",.)
replace CompanyName3=subinstr(CompanyName3,")","",.)
replace CompanyName3=subinstr(CompanyName3,"formerly known as","",.)

replace CompanyName=regexr(CompanyName, "[(].+( prior to).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](formerly known as).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](de-listed).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](delisted).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](ceased trading).+[)]"," ")
replace CompanyName=regexr(CompanyName, "[(](listed).+[)]"," ")
replace CompanyName2=regexr(CompanyName2, "[(].+( prior to).+[)]"," ")
replace CompanyName2=regexr(CompanyName2, "[(](formerly known as).+[)]"," ")
replace CompanyName2=regexr(CompanyName2, "[(](de-listed).+[)]"," ")
replace CompanyName2=regexr(CompanyName2, "[(](delisted).+[)]"," ")
replace CompanyName2=regexr(CompanyName2, "[(](ceased trading).+[)]"," ")
replace CompanyName2=regexr(CompanyName2, "[(](listed).+[)]"," ")
replace CompanyName3=regexr(CompanyName3, "[(].+( prior to).+[)]"," ")
replace CompanyName3=regexr(CompanyName3, "[(](formerly known as).+[)]"," ")
replace CompanyName3=regexr(CompanyName3, "[(](de-listed).+[)]"," ")
replace CompanyName3=regexr(CompanyName3, "[(](delisted).+[)]"," ")
replace CompanyName3=regexr(CompanyName3, "[(](ceased trading).+[)]"," ")
replace CompanyName3=regexr(CompanyName3, "[(](listed).+[)]"," ")

gen temp=CompanyName
split temp, parse("(") generate(CompanyName_temp)
drop temp
rename CompanyName_temp1 CompanyName1
rename CompanyName_temp2 CompanyName4
rename CompanyName_temp3 CompanyName5
rename CompanyName company_name
 
forvalues x=1(1)5 {
replace CompanyName`x'=lower(CompanyName`x')
replace CompanyName`x'=rtrim(CompanyName`x')
replace CompanyName`x'=ltrim(CompanyName`x')
replace CompanyName`x'=itrim(CompanyName`x')
replace CompanyName`x'=subinstr(CompanyName`x',"(","",.)
replace CompanyName`x'=subinstr(CompanyName`x',")","",.)
replace CompanyName`x'="" if CompanyName`x'=="the"
replace CompanyName`x'=regexr(CompanyName`x',"^(aka)[ ]"," ")
}

forvalues x=1(1)5 {
replace CompanyName`x'=lower(CompanyName`x')
replace CompanyName`x'=rtrim(CompanyName`x')
replace CompanyName`x'=ltrim(CompanyName`x')
replace CompanyName`x'=itrim(CompanyName`x')
}

gen n=_n
preserve
keep n CompanyName1 CompanyName2 CompanyName3 CompanyName4 CompanyName5
reshape long CompanyName, i(n) j(num)
drop if CompanyName==""
duplicates drop  CompanyName n, force
drop num
bys n: gen num=_n
reshape wide CompanyName, i(n) j(num)
tempfile temp
save "`temp'" , replace
restore

keep CompanyID CompanyName_Orig n
merge 1:1 n using "`temp'"
drop _m
drop n

forvalues i=1(1)4 {
stnd_compname CompanyName`i', gen(CompanyName`i'_stnd) p(${dir}/Data/Pattern_Files)
}

gen id_master=_n

* Fuzzy match of the 2 datasets
forvalues i=1(1)4 {
preserve
rename CompanyName`i'_stnd CompanyName_stnd
reclink CompanyName_stnd using "${dir}/temp_PACs.dta", gen(quality_match) idusing(id_using) idmaster(id_master) 
save "${dir}/Data/Matched Datasets/Matched_PACs_`i'.dta"
restore
}
erase "${dir}/temp_PACs.dta"

forvalues i=1(1)4 {
use "${dir}/Data/Matched Datasets/Matched_PACs_`i'.dta", clear
keep if _m==3
keep CompanyID CompanyName_Orig CompanyName_stnd UCompanyName_stnd id_master quality_match id_using bonicacid mostrecentcontributorname mostrecentcontributorcity mostrecentcontributorzipcode mostrecentcontributorstate first_cycle_active last_cycle_active
tempfile temp`i'
save "`temp`i''" , replace
}
use "`temp1'"
forvalues i=2(1)4 {
append using "`temp`i''"
}
duplicates drop id_master id_using CompanyName_stnd UCompanyName_stnd, force
drop if CompanyName_stnd=="" | UCompanyName_stnd==""
gen matched=1 if quality_match==1
preserve
use "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned_SIC_CRPsector.dta", clear /* get information on address for checking matches manually */
duplicates drop CompanyID, force
keep CompanyID HOAddress1 HOAddress2 HOAddress3 HOAddress4 HOAddress5 HOURL
tempfile temp
save "`temp'" , replace
restore
merge m:1 CompanyID using "`temp'"
drop if _m==2
drop _m
gsort -quality_match CompanyID CompanyName_stnd UCompanyName_stnd
order CompanyName_stnd UCompanyName_stnd matched mostrecentcontributorcity mostrecentcontributorstate HOAddress1 HOAddress2 HOAddress3 HOAddress4 HOAddress5 HOURL quality_match
export excel CompanyName_stnd UCompanyName_stnd matched mostrecentcontributorcity mostrecentcontributorstate HOAddress1 HOAddress2 HOAddress3 HOAddress4 HOAddress5 HOURL quality_match id_master id_using CompanyName_Orig mostrecentcontributorname first_cycle_active last_cycle_active using "${dir}/Data/Matched Datasets/Matched_PACs_TOCHECK.xlsx", replace first(var)
* The dataset checked by hand is "${dir}/Data/Matched Datasets/Matched_PACs_CHECKED.dta"
preserve
use "${dir}/Data/Matched Datasets/Matched_PACs_CHECKED.dta", clear
duplicates drop CompanyName_stnd UCompanyName_stnd, force /* keep only one observation for each check done  */
keep CompanyName_stnd UCompanyName_stnd matched
tempfile temp
save "`temp'" , replace
restore
drop matched
merge m:1 CompanyName_stnd UCompanyName_stnd using "`temp'"
* keep for each CompanyID=bonicacid combination the max of matched (since a Boardex company can be matched with a name to a PAC, but not with another name)
bys CompanyID bonicacid: egen max=max(matched)
keep if max==1
duplicates drop bonicacid CompanyID, force
keep bonicacid CompanyID
save "${dir}/Data/Matched Datasets/Company_PACs_MATCHED.dta", replace
format bonicacid %20.0g
duplicates drop bonicacid, force
keep bonicacid
save "${dir}/Data/Matched Datasets/Company_PACs_MATCHED_temp.dta", replace


* Keep from contributions data only the contributions of committees and the variables of interest
forvalues d=1980(2)2014 {
cd "${dir}/Data/DIME_V3"
insheet using contribDB_`d'.csv, clear
keep if contributortype=="C" /* keep only PACs */
keep cycle transactionid transactiontype amount date bonicacid recipientname bonicarid recipientparty recipienttype recipientstate seat electiontype
format bonicacid %20.0g
merge m:1 bonicacid using "${dir}/Data/Matched Datasets/Company_PACs_MATCHED_temp.dta"
keep if _m==3
drop _m
cd "${dir}/Data/Contributions"
save "temp_`d'.dta", replace
}

foreach i in a b c d e f g h i j k {
	cd "${dir}/Data/DIME_V3/2016"
	insheet using "xa`i'.csv", clear
	cap rename (v1-v46) (cycle transactionid transactiontype amount date bonicacid contributorname contributorlname contributorfname contributormname contributorsuffix contributortitle contributorffname contributortype contributorgender contributoraddress contributorcity contributorstate contributorzipcode contributoroccupation contributoremployer iscorp recipientname bonicarid recipientparty recipienttype recipientstate seat electiontype latitude longitude gisconfidence contributordistrict90s contributordistrict00s contributordistrict10s censustract efecmemo efecmemo2 efectransactionidorig bkreftransactionid efecorgorig efeccomidorig efecformtype excludedfromscaling contributorcfscore candidatecfscore)
	keep if contributortype=="C" /* keep only PACs */
	keep cycle transactionid transactiontype amount date bonicacid recipientname bonicarid recipientparty recipienttype recipientstate seat electiontype
	cap destring cycle, force replace
	cap destring bonicacid, force replace
	format bonicacid %20.0g
	merge m:1 bonicacid using "${dir}/Data/Matched Datasets/Company_PACs_MATCHED_temp.dta"
	keep if _m==3
	drop _m
	cd "${dir}/Data/Contributions"
	save "temp_2016_`i'.dta", replace
	}
foreach i in a b c d e f g h i j k l m n {
	cd "${dir}/Data/DIME_V3/2018"
	insheet using "xa`i'.csv", clear
	cap rename (v1-v46) (cycle transactionid transactiontype amount date bonicacid contributorname contributorlname contributorfname contributormname contributorsuffix contributortitle contributorffname contributortype contributorgender contributoraddress contributorcity contributorstate contributorzipcode contributoroccupation contributoremployer iscorp recipientname bonicarid recipientparty recipienttype recipientstate seat electiontype latitude longitude gisconfidence contributordistrict90s contributordistrict00s contributordistrict10s censustract efecmemo efecmemo2 efectransactionidorig bkreftransactionid efecorgorig efeccomidorig efecformtype excludedfromscaling contributorcfscore candidatecfscore)
	keep if contributortype=="C" /* keep only PACs */
	keep cycle transactionid transactiontype amount date bonicacid recipientname bonicarid recipientparty recipienttype recipientstate seat electiontype
	cap destring cycle, force replace
	cap destring bonicacid, force replace
	format bonicacid %20.0g
	merge m:1 bonicacid using "${dir}/Data/Matched Datasets/Company_PACs_MATCHED_temp.dta"
	keep if _m==3
	drop _m
	cd "${dir}/Data/Contributions"
	save "temp_2018_`i'.dta", replace
	}
	
foreach i in a b c d e f g h i j k {
use "temp_2016_`i'.dta", clear
cap tostring recipientparty, replace
save "temp_2016_`i'.dta", replace
}	

foreach i in a b c d e f g h i j k l m n {
use "temp_2018_`i'.dta", clear
cap tostring recipientparty, replace
save "temp_2018_`i'.dta", replace
}
	
use "temp_1980.dta", clear
forvalues d=1982(2)2014 {
append using "temp_`d'.dta"
}
foreach i in a b c d e f g h i j k {
append using "temp_2016_`i'.dta"
}
foreach i in a b c d e f g h i j k l m n {
append using "temp_2018_`i'.dta"
}

save "${dir}/Data/Matched Datasets/Company_PACs_Contributions_Matched.dta", replace


		***************************************
		***************************************
		*** merge with candidates' database ***
		***************************************
		***************************************

*** merge by bonicarid to get info on icpsr2 ***
insheet using "${dir}/Data/DIME_V3/dime_recipients_all_1979_2018.csv", clear
keep bonicarid icpsr2 icpsr cycle beforeswitchicpsr afterswitchicpsr name

/*
REPLACE THESE MISTAKES IN BONICA DATA: different icpsr2 and bonicarid assigned to the same person
*/
replace icpsr2="PA143065" if bonicarid=="cand142752"
replace bonicarid="cand101855" if bonicarid=="cand142752"
/*
REPLACE THESE MISTAKES IN BONICA DATA FOR CANDIDATES: same bonicarid assigned to 2 different candidates
* NOTE: I do this only for candidate ever in Congress, so with a "proper" icpsr2
*/
replace bonicarid="cand1383_2" if bonicarid=="cand1383" & icpsr2=="10788"
replace bonicarid="cand767_2" if bonicarid=="cand767" & icpsr2=="10719"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & icpsr2=="21344"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & name=="maloney, sean patrick"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & name=="MALONEY, SEAN PATRICK"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & name=="friends of maloney"
replace icpsr2="21344" if bonicarid=="cand1318_2"
replace icpsr2=icpsr if bonicarid=="cand1267" & name=="BROWN, ALVIN"
replace icpsr2=icpsr if bonicarid=="cand1267" & name=="BROWN, PRINCE"
replace icpsr2=icpsr if bonicarid=="cand1267" & name=="brown, prince"
replace bonicarid="cand1267_2" if bonicarid=="cand1267" & name=="BROWN, ALVIN"
replace bonicarid="cand1267_3" if bonicarid=="cand1267" & name=="BROWN, PRINCE"
replace bonicarid="cand1267_3" if bonicarid=="cand1267" & name=="brown, prince"
replace icpsr2=icpsr if bonicarid=="cand43796" & name=="meng, matt"
replace bonicarid="cand43796_2" if bonicarid=="cand43796" & name=="meng, matt"
replace icpsr2=icpsr if bonicarid=="cand3314" & name=="santana, alberto l"
replace icpsr2=icpsr if bonicarid=="cand3314" & name=="santana, alberto"
replace bonicarid="cand3314_2" if bonicarid=="cand3314" & name=="santana, alberto l"
replace bonicarid="cand3314_2" if bonicarid=="cand3314" & name=="santana, alberto"
replace icpsr2="H4NY12029" if name=="VELAZQUEZ, PEDRO L" & bonicarid=="H4NY12029"
replace bonicarid="cand131777" if name=="VELAZQUEZ, PEDRO L" & bonicarid=="H4NY12029"
replace icpsr2="41700" if bonicarid=="cand131015" & name=="MASTO, CATHERINE CORTEZ"
replace bonicarid="cand83453" if bonicarid=="cand131015" & name=="MASTO, CATHERINE CORTEZ"
replace icpsr2="H8WY00072" if bonicarid=="cand105249" & name=="SHARRATT, BRYAN EDWARDS"
replace bonicarid="cand57085" if bonicarid=="cand105249" & name=="SHARRATT, BRYAN EDWARDS"
replace icpsr2=icpsr if bonicarid=="cand181237" & name=="bartlett, dewey follett"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & name=="bartlett, dewey follett"
replace icpsr2=icpsr if bonicarid=="cand181237" & name=="bartlett, dewey f jr"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & name=="bartlett, dewey f jr"
replace icpsr2=icpsr if bonicarid=="cand241" & name=="early, joseph d jr"
replace bonicarid="cand241_2" if bonicarid=="cand241" & name=="early, joseph d jr"
replace icpsr2=icpsr if bonicarid=="cand271" & name=="MCDONALD, KATHRYN JACKSON"
replace bonicarid="cand271_2" if bonicarid=="cand271" & name=="MCDONALD, KATHRYN JACKSON"
replace icpsr2=icpsr if bonicarid=="cand323" & name=="corcoran, thomas e"
replace bonicarid="cand323_2" if bonicarid=="cand323" & name=="corcoran, thomas e"
replace icpsr2=icpsr if bonicarid=="cand323" & name=="CORCORAN, THOMAS E"
replace bonicarid="cand323_2" if bonicarid=="cand323" & name=="CORCORAN, THOMAS E"
replace icpsr2=icpsr if bonicarid=="cand393" & name=="ATKINSON, FREDERICK WAYNE"
replace bonicarid="cand393_2" if bonicarid=="cand393" & name=="ATKINSON, FREDERICK WAYNE"
replace icpsr2="H0FL18017" if bonicarid=="cand507" & name=="EVANS, CHARLES RAY"
replace bonicarid="cand36009" if bonicarid=="cand507" & name=="EVANS, CHARLES RAY"
replace icpsr2=icpsr if bonicarid=="cand551" & name=="smith, dennis c w"
replace bonicarid="cand551_2" if bonicarid=="cand551" & name=="smith, dennis c w"
replace icpsr2=icpsr if bonicarid=="cand551" & name=="SMITH, DENNIS C W"
replace bonicarid="cand551_2" if bonicarid=="cand551" & name=="SMITH, DENNIS C W"
replace icpsr2=icpsr if bonicarid=="cand839" & name=="lynch, stephen a"
replace bonicarid="cand839_2" if bonicarid=="cand839" & name=="lynch, stephen a"
replace icpsr2="CA_CP1356482" if bonicarid=="cand1082" & name=="chu, kansen"
replace bonicarid="cand9996708" if bonicarid=="cand1082" & name=="chu, kansen"
replace icpsr2=icpsr if bonicarid=="cand1220" & name=="ANDREWS, CAMILLE SPINELLO"
replace bonicarid="cand1220_2" if bonicarid=="cand1220" & name=="ANDREWS, CAMILLE SPINELLO"
replace icpsr2=icpsr if bonicarid=="cand1407" & name=="stockman, draft"
replace bonicarid="cand1407_2" if bonicarid=="cand1407" & name=="stockman, draft"
replace icpsr2=icpsr if bonicarid=="cand1521" & name=="fletcher, wynetta wendy"
replace bonicarid="cand1521_2" if bonicarid=="cand1521" & name=="fletcher, wynetta wendy"
replace icpsr2=icpsr if bonicarid=="cand1620" & name=="faircloth, resson oliver jr"
replace bonicarid="cand1620_2" if bonicarid=="cand1620" & name=="faircloth, resson oliver jr"
replace icpsr2=icpsr if bonicarid=="cand3303" & name=="kearns, don"
replace bonicarid="cand3303_2" if bonicarid=="cand3303" & name=="kearns, don"

/*
REPLACE THESE MISTAKES IN BONICA DATA: different bonicarid assigned to the same person
* NOTE: I do this only for candidate ever in Congress, so with a "proper" icpsr2
*/
replace bonicarid="cand44995" if bonicarid=="cand140862"
replace bonicarid="cand160674" if icpsr2=="21525" & cycle==2020

duplicates drop bonicarid, force
drop name cycle
save "temp_recipient.dta", replace


*** MERGE TEMP DATASET TO OBTAIN ICPSR2 ***

use "${dir}/Data/Matched Datasets/Company_PACs_Contributions_Matched.dta", clear

/*
REPLACE THESE MISTAKES IN BONICA DATA: different icpsr2 and bonicarid assigned to the same person
*/
replace bonicarid="cand101855" if bonicarid=="cand142752"
/*
REPLACE THESE MISTAKES IN BONICA DATA FOR CANDIDATES (bonicarid is different for these candidates between contributions datasests and dime_recipients_all_1979_2018, or it is missing in contributions datasets)
*/
replace bonicarid="cand118157" if bonicarid=="cand140869"
replace bonicarid="cand137579" if bonicarid=="cand141325"
replace bonicarid="cand101855" if bonicarid=="cand152881"
replace bonicarid="cand143446" if bonicarid=="cand27327"
replace bonicarid="cand35025" if recipientname=="PORTANTINO, ANTHONY"
replace bonicarid="cand35025" if recipientname=="portantino, anthony"
replace bonicarid="cand35215" if recipientname=="SIMITIAN, S. JOSEPH"
replace bonicarid="cand35215" if recipientname=="simitian, s joseph"
replace bonicarid="cand35215" if recipientname=="friends of simitian, "
replace bonicarid="cand35215" if recipientname=="simitian for california state assembly, "
replace bonicarid="cand35215" if recipientname=="simitian for state senate, "
replace bonicarid="cand809" if recipientname=="TAYLOR, GENE"
replace recipientstate="MS" if recipientname=="TAYLOR, GENE"
replace bonicarid="cand9998006" if recipientname=="estes, dawn"
replace bonicarid="cand28614" if recipientname=="jones, dave"
replace bonicarid="cand143845" if recipientname=="perez, john a"
replace bonicarid="cand9996636" if recipientname=="weinreb, brad"
replace bonicarid="cand9993355" if recipientname=="cohn, steve"
replace bonicarid="cand9999143" if recipientname=="johannessen, mark"
replace bonicarid="cand28713" if recipientname=="krekorian, paul"
/*
REPLACE THESE MISTAKES IN BONICA DATA FOR CANDIDATES: same bonicarid assigned to 2 different candidates
* NOTE: I do this only for candidate ever in Congress, so with a "proper" icpsr2
*/
replace bonicarid="cand1383_2" if bonicarid=="cand1383" & recipientname=="JONES, WALTER"
replace bonicarid="cand767_2" if bonicarid=="cand767" & recipientname=="DUNCAN, JOHN J"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="MALONEY, SEAN PATRICK"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="SEAN PATRICK MALONEY FOR CONGRESS"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="friends of maloney"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="maloney, sean patrick"
replace bonicarid="cand1318_2" if bonicarid=="cand1318" & recipientname=="sean patrick maloney for congress"
replace bonicarid="cand1267_2" if bonicarid=="cand1267" & recipientname=="BROWN, ALVIN"
replace bonicarid="cand43796_2" if bonicarid=="cand43796" & recipientname=="meng, matt"
replace bonicarid="cand131777" if bonicarid=="H4NY12029" & recipientname=="VELAZQUEZ, PEDRO L"
replace bonicarid="cand83453" if bonicarid=="cand131015" & recipientname=="MASTO, CATHERINE CORTEZ"
replace bonicarid="cand57085" if bonicarid=="cand105249" & recipientname=="SHARRATT, BRYAN EDWARDS"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & recipientname=="bartlett, dewey follett"
replace bonicarid="cand181237_2" if bonicarid=="cand181237" & recipientname=="bartlett, dewey f jr"
replace bonicarid="cand241_2" if bonicarid=="cand241" & recipientname=="early, joseph d jr"
replace bonicarid="cand271_2" if bonicarid=="cand271" & recipientname=="MCDONALD, KATHRYN JACKSON"
replace bonicarid="cand323_2" if bonicarid=="cand323" & recipientname=="corcoran, thomas e"
replace bonicarid="cand323_2" if bonicarid=="cand323" & recipientname=="CORCORAN, THOMAS E"
replace bonicarid="cand393_2" if bonicarid=="cand393" & recipientname=="ATKINSON, FREDERICK WAYNE"
replace bonicarid="cand36009" if bonicarid=="cand507" & recipientname=="EVANS, CHARLES RAY"
replace bonicarid="cand551_2" if bonicarid=="cand551" & recipientname=="smith, dennis c w"
replace bonicarid="cand551_2" if bonicarid=="cand551" & recipientname=="SMITH, DENNIS C W"
replace bonicarid="cand839_2" if bonicarid=="cand839" & recipientname=="lynch, stephen a"
replace bonicarid="cand9996708" if bonicarid=="cand1082" & recipientname=="chu, kansen"
replace bonicarid="cand1220_2" if bonicarid=="cand1220" & recipientname=="ANDREWS, CAMILLE SPINELLO"
replace bonicarid="cand1407_2" if bonicarid=="cand1407" & recipientname=="stockman, draft"
replace bonicarid="cand1521_2" if bonicarid=="cand1521" & recipientname=="fletcher, wynetta wendy"
replace bonicarid="cand1620_2" if bonicarid=="cand1620" & recipientname=="faircloth, resson oliver jr"
replace bonicarid="cand3303_2" if bonicarid=="cand3303" & recipientname=="kearns, don"

/*
REPLACE THESE MISTAKES IN BONICA DATA: different bonicarid assigned to the same person
* NOTE: I do this only for candidate ever in Congress, so with a "proper" icpsr2
*/
replace bonicarid="cand44995" if bonicarid=="cand140862"

merge m:1 bonicarid using "temp_recipient.dta" /* Note: 0.1% of observations unmatched (missing bonicarid) */
drop if _m==2
drop _m

* Party switchers
replace icpsr2="20327" if icpsr2=="90327"
replace icpsr2="29440" if icpsr2=="29940"
replace icpsr2="29735" if icpsr2=="41105"
replace icpsr2="20101" if icpsr2=="41100"
replace icpsr2="20524" if icpsr2=="21169"
replace icpsr2="29542" if icpsr2=="99542"
replace icpsr2="29767" if icpsr2=="89767"
replace icpsr2="29767" if icpsr2=="99767"
replace icpsr2="20901" if icpsr2=="90901"
replace icpsr2="14828" if icpsr2=="94828"
replace icpsr2="29722" if icpsr2=="41103"
replace icpsr2="21144" if icpsr2=="20725"

* Info on CompanyID
joinby bonicacid using "${dir}/Data/Matched Datasets/Company_PACs_MATCHED.dta"

* Save datasets with all 1980-2018 contributions
save "${dir}/Data/Matched Datasets/Company_PACs_Contributions_Matched.dta", replace

* Keep only cycles starting since 2000
drop if cycle<2000

* Merge with information on sitting MCs
preserve
use "${dir}/Data/Federal Committee Assignmnents/committee_assignments_final.dta", clear
keep icpsr2 cycle
duplicates drop icpsr2 cycle, force
save "temp_MCs.dta", replace
restore
merge m:1 icpsr2 cycle using "temp_MCs.dta"
keep if _m==3
drop _m

collapse (sum) amount, by(CompanyID cycle)

* Save datasets with all 2000-2018 contributions to sitting MCs only
save "${dir}/Data/Matched Datasets/Company_PACs_Contributions_Matched_MCs.dta", replace

forvalues d=1980(2)2014 {
erase "temp_`d'.dta"
}
foreach i in a b c d e f g h i j k {
erase "temp_2016_`i'.dta"
}
foreach i in a b c d e f g h i j k l m n {
erase "temp_2018_`i'.dta"
}
erase "${dir}/Data/Matched Datasets/Company_PACs_MATCHED_temp.dta"
erase "temp_recipient.dta"
erase "temp_MCs.dta"

cd "${dir}/Data"


		***********************************************************************************************
		***********************************************************************************************
		*** 				STEP 16:  Generate final datasets for the analysis				    	***
		***********************************************************************************************
		***********************************************************************************************

		* Note: the dataset is actually at the Director-Firm-MC-chamber-cycle level (not MC-cycle) since a handful of MC serve
		* in both house and senate in same cycle

* 1) Use committee assignment data to obtain list of all MCs serving in a cycle

use "${dir}/Data/Federal Committee Assignmnents/committee_assignments_final.dta", clear
keep icpsr2 cycle chamber majority majority_leader majority_whip minority_leader minority_whip speaker_house
destring icpsr2, replace
save "temp.dta", replace


* 2) Since the final dataset with all variables will be too big, save useful information in a separate dataset

use "${dir}/Data/Boardex/Directors_to_Match_Career_Cleaned_SIC_CRPsector.dta", clear
keep DirectorID CompanyID cycle IndividualName CompanyName Sector CompanyISIN CompanyTicker CompanyIndex NationalityMix HOAddress1 HOAddress2 HOAddress3 HOAddress4 HOAddress5 HOURL cik gvkey sic Catcode catname catorder industry sector DirectorTypeEDorSD years_on_board_cycle Board top_mgmt
* create numeric code of string sector variables
egen Catcode_num=group(Catcode)
egen catorder_num=group(catorder)
* Keep only the observations with non-missing sector classification in CRP (only 63 companies, with a total of 4,806 observations deleted)
keep if Catcode!=""
save "temp_2.dta", replace
* Merge with info on corporate PACs' donations
merge m:1 CompanyID cycle using "${dir}/Data/Matched Datasets/Company_PACs_Contributions_Matched_MCs.dta"
drop if _m==2
replace amount=0 if _m==1
drop _m
rename amount amount_corporate_PAC
label var amount_corporate_PAC "Donations from corporate PAC"
save "${dir}/Data/Final Datasets/Directors_Information.dta", replace


* 3) Dataset of directors' contributions to MCs only

use "${dir}/Data/Federal Committee Assignmnents/committee_assignments_final.dta", clear
keep icpsr2 cycle
duplicates drop icpsr2 cycle, force
save "temp_3.dta", replace
use "${dir}/Data/Final Datasets/Contributions_Final.dta", clear
keep DirectorID icpsr2 cycle amount_noself 
merge m:1 icpsr2 cycle using "temp_3.dta"
keep if _m==3
drop _m
collapse (sum) amount_noself, by(DirectorID icpsr2 cycle)
replace amount_noself=0 if amount_noself<0
destring icpsr2, replace
save "${dir}/Data/Final Datasets/Contributions_Final_Committee_Analysis.dta", replace


* 4) Create final dataset at the Director-Firm-cycle-MC level

use "temp_2.dta", clear
drop IndividualName CompanyName Sector CompanyISIN CompanyTicker CompanyIndex NationalityMix HOAddress1 HOAddress2 HOAddress3 HOAddress4 HOAddress5 HOURL cik gvkey sic catname industry sector Catcode catorder DirectorTypeEDorSD years_on_board_cycle

* How many individuals in these companies? 401,557
unique DirectorID
* How many companies? 14,807
unique CompanyID
* How many observations for each cycle?
tab cycle
/*
      cycle |      Freq.     Percent        Cum.
------------+-----------------------------------
       2000 |     30,749        2.13        2.13
       2002 |     44,571        3.09        5.22
       2004 |     86,300        5.98       11.21
       2006 |    111,636        7.74       18.95
       2008 |    139,681        9.69       28.63
       2010 |    137,904        9.56       38.19
       2012 |    158,713       11.00       49.20
       2014 |    208,486       14.46       63.66
       2016 |    266,218       18.46       82.12
       2018 |    257,934       17.88      100.00
------------+-----------------------------------
      Total |  1,442,192      100.00

*/
* How many companies for each cycle?
preserve
duplicates drop CompanyID cycle, force
tab cycle
restore
/*
      cycle |      Freq.     Percent        Cum.
------------+-----------------------------------
       2000 |      1,544        2.54        2.54
       2002 |      1,967        3.24        5.77
       2004 |      4,586        7.54       13.32
       2006 |      5,478        9.01       22.33
       2008 |      6,782       11.15       33.48
       2010 |      6,258       10.29       43.77
       2012 |      6,836       11.24       55.02
       2014 |      8,193       13.48       68.49
       2016 |      9,920       16.32       84.81
       2018 |      9,237       15.19      100.00
------------+-----------------------------------
      Total |     60,801      100.00

*/
* How many individuals for each cycle?
preserve
duplicates drop DirectorID cycle, force
tab cycle
restore
/*
      cycle |      Freq.     Percent        Cum.
------------+-----------------------------------
       2000 |     27,690        2.19        2.19
       2002 |     39,612        3.13        5.32
       2004 |     76,029        6.01       11.32
       2006 |     96,198        7.60       18.92
       2008 |    121,420        9.59       28.51
       2010 |    121,226        9.58       38.08
       2012 |    139,140       10.99       49.07
       2014 |    182,328       14.40       63.48
       2016 |    234,134       18.49       81.97
       2018 |    228,275       18.03      100.00
------------+-----------------------------------
      Total |  1,266,052      100.00

*/
* For how many cycles do individuals appear in a company?
preserve
bys DirectorID CompanyID: gen N=_N
duplicates drop DirectorID CompanyID, force
tab N
restore
/*
          N |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    152,561       28.23       28.23
          2 |    168,144       31.11       59.34
          3 |     98,687       18.26       77.60
          4 |     46,637        8.63       86.23
          5 |     26,896        4.98       91.21
          6 |     20,557        3.80       95.01
          7 |     11,901        2.20       97.21
          8 |      8,882        1.64       98.86
          9 |      3,271        0.61       99.46
         10 |      2,911        0.54      100.00
------------+-----------------------------------
      Total |    540,447      100.00

*/
* For how many cycles do individuals appear in a sector?
preserve
duplicates drop DirectorID catorder_num cycle, force
bys DirectorID catorder_num: gen N=_N
duplicates drop DirectorID catorder_num, force
tab N
restore
/*

          N |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |    113,030       24.21       24.21
          2 |    138,215       29.61       53.82
          3 |     88,368       18.93       72.75
          4 |     44,740        9.58       82.34
          5 |     27,200        5.83       88.17
          6 |     22,684        4.86       93.02
          7 |     13,508        2.89       95.92
          8 |     10,811        2.32       98.23
          9 |      4,326        0.93       99.16
         10 |      3,915        0.84      100.00
------------+-----------------------------------
      Total |    466,797      100.00

*/

* Joinby by cycle icpsr2
joinby cycle using "temp.dta"
merge m:1 DirectorID icpsr2 cycle using "${dir}/Data/Final Datasets/Contributions_Final_Committee_Analysis.dta"
drop if _m==2
replace amount_noself=0 if _m==1
drop _m  
save "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level.dta", replace

erase "temp.dta"
erase "temp_2.dta"



* 5) Merge final dataset with definition 2 using Subcommittee coding as well

use "${dir}/Data/Final Datasets/Directors_Information.dta", clear
keep catorder catorder_num
duplicates drop catorder, force
save "temp.dta", replace
use "${dir}/Data/Final Datasets/MC_cycle_industry_match_Definition_2_SUB.dta", clear
keep icpsr2 cycle chamber catorder chairman ranking_minority
destring icpsr2, replace
merge m:1 catorder using "temp.dta"
drop if _m==1
drop _m
drop catorder
save "temp.dta", replace
use "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level.dta", clear
merge m:1 icpsr2 cycle chamber catorder_num using "temp.dta"
drop if _m==2
gen relevant=(_m==3)
drop _m
save "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level_DEF2.dta", replace


* 6) Merge final dataset with definition 1 using Subcommittee coding as well

use "${dir}/Data/Final Datasets/Directors_Information.dta", clear
keep catorder catorder_num
duplicates drop catorder, force
save "temp.dta", replace
use "${dir}/Data/Final Datasets/MC_cycle_industry_match_Definition_1_SUB.dta", clear
keep icpsr2 cycle chamber catorder chairman ranking_minority
destring icpsr2, replace
merge m:1 catorder using "temp.dta"
drop if _m==1 
drop _m
drop catorder
save "temp.dta", replace
use "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level.dta", clear
merge m:1 icpsr2 cycle chamber catorder_num using "temp.dta"
drop if _m==2
gen relevant=(_m==3)
drop _m
save "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level_DEF1.dta", replace


* 7) Merge final dataset with definition in Ovtchinnikov and Pantaleoni (2012)

use "${dir}/Data/Final Datasets/Directors_Information.dta", clear
keep catorder catorder_num
duplicates drop catorder, force
save "temp.dta", replace
use "${dir}/Data/Final Datasets/MC_cycle_industry_match_Definition_ovtchinnikov_and_pantaleoni_2012.dta", clear
keep icpsr2 cycle chamber catorder chairman ranking_minority
destring icpsr2, replace
merge m:1 catorder using "temp.dta"
drop if _m==1 
drop _m
drop catorder
save "temp.dta", replace
use "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level.dta", clear
merge m:1 icpsr2 cycle chamber catorder_num using "temp.dta"
drop if _m==2
gen relevant=(_m==3)
drop _m
save "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level_ovtchinnikov_and_pantaleoni_2012.dta", replace


* 8) Dataset for event study - entry

* Dataset for event study - entry - Company level - House and Senate - chamber level

use DirectorID CompanyID cycle icpsr2 chamber relevant amount_noself using "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level_DEF2.dta", clear

gen donated=(amount_noself>0)

sort DirectorID CompanyID icpsr2 chamber cycle
by DirectorID CompanyID icpsr2 chamber: gen event_0=(relevant==1 & relevant[_n-1]==0 & relevant[_n-2]==0 & relevant[_n-3]==0)

save "temp.dta", replace

set more off

forvalues cycle=2006(2)2018 {

use "temp.dta", clear

keep if cycle>=`cycle'-6 & cycle<=`cycle'

gen t=(cycle==`cycle' & event_0==1)
bys DirectorID CompanyID icpsr2 chamber: egen treated=max(t)
gen control_t=(relevant==0 & treated==0)
drop t 
keep if control_t==1 | treated==1
bys DirectorID CompanyID icpsr2 chamber: gen N=_N
keep if N==4
drop N
keep DirectorID CompanyID cycle icpsr2 chamber relevant donated treated
gen period=.
replace period=1 if cycle==`cycle'-6
replace period=2 if cycle==`cycle'-4
replace period=3 if cycle==`cycle'-2
replace period=4 if cycle==`cycle'
gen treated_p1=(treated==1 & period==1)
gen treated_p2=(treated==1 & period==2)
gen treated_p3=(treated==1 & period==3)
gen treated_p4=(treated==1 & period==4)
gen event=`cycle'
save "temp_event`cycle'.dta", replace

}

use "temp_event2006.dta", clear
append using "temp_event2008.dta"
append using "temp_event2010.dta"
append using "temp_event2012.dta"
append using "temp_event2014.dta"
append using "temp_event2016.dta"
append using "temp_event2018.dta"

save "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level_DEF2_EventStudyAppointment_CompanyLevel_HouseSenate_2.dta", replace


* 9) Dataset for event study - exit

* Dataset for event study - exit - Company level - House and Senate - chamber level

use DirectorID CompanyID cycle icpsr2 chamber relevant amount_noself using "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level_DEF2.dta", clear

gen donated=(amount_noself>0)

sort DirectorID CompanyID icpsr2 chamber cycle
by DirectorID CompanyID icpsr2 chamber: gen event_0=(relevant==0 & relevant[_n-1]==1 & relevant[_n-2]==1 & relevant[_n-3]==1)

save "temp.dta", replace

set more off

forvalues cycle=2006(2)2018 {

use "temp.dta", clear

keep if cycle>=`cycle'-6 & cycle<=`cycle'

gen t=(cycle==`cycle' & event_0==1)
bys DirectorID CompanyID icpsr2 chamber: egen treated=max(t)
gen control_t=(relevant==1 & treated==0)
drop t 
keep if control_t==1 | treated==1
bys DirectorID CompanyID icpsr2 chamber: gen N=_N
keep if N==4
drop N
keep DirectorID CompanyID cycle icpsr2 chamber relevant donated treated
gen period=.
replace period=1 if cycle==`cycle'-6
replace period=2 if cycle==`cycle'-4
replace period=3 if cycle==`cycle'-2
replace period=4 if cycle==`cycle'
gen treated_p1=(treated==1 & period==1)
gen treated_p2=(treated==1 & period==2)
gen treated_p3=(treated==1 & period==3)
gen treated_p4=(treated==1 & period==4)
gen event=`cycle'
save "temp_event`cycle'.dta", replace

}

use "temp_event2006.dta", clear
append using "temp_event2008.dta"
append using "temp_event2010.dta"
append using "temp_event2012.dta"
append using "temp_event2014.dta"
append using "temp_event2016.dta"
append using "temp_event2018.dta"

save "${dir}/Data/Final Datasets/Final_Data_Director_MC_Level_DEF2_EventStudyExit_CompanyLevel_HouseSenate_2.dta", replace



* 8) Auxiliary dataset for lobbying analysis

insheet using "${dir}/Data/AUX/dataset___client_level.csv", clear 
keep client_uuid gvkey
tempfile temp
save "`temp'" , replace
insheet using "${dir}/Data/AUX/dataset___report_level.csv", clear
merge m:1 client_uuid using "`temp'"
drop if _m==2
drop _m
gen cycle=.
replace cycle=2000 if report_year==1999 | report_year==2000
replace cycle=2002 if report_year==2001 | report_year==2002
replace cycle=2004 if report_year==2003 | report_year==2004
replace cycle=2006 if report_year==2005 | report_year==2006
replace cycle=2008 if report_year==2007 | report_year==2008
replace cycle=2010 if report_year==2009 | report_year==2010
replace cycle=2012 if report_year==2011 | report_year==2012
replace cycle=2014 if report_year==2013 | report_year==2014
replace cycle=2016 if report_year==2015 | report_year==2016
replace cycle=2018 if report_year==2017 | report_year==2018
drop report_year
keep if cycle!=.
keep if is_no_activity=="f"
keep cycle gvkey
duplicates drop gvkey cycle, force
tempfile temp
save "`temp'" , replace
use "${dir}/Data/Final Datasets/Directors_Information.dta", clear
keep CompanyID gvkey cycle
destring gvkey , replace
duplicates drop CompanyID cycle, force
keep if gvkey!=.
merge m:1 gvkey cycle using "`temp'"
drop if _m==2
gen lobbying_firm=(_m==3)
drop _m gvkey
save "${dir}/Data/AUX/lobbying_firms_list_cycle.dta", replace







